Skip to content

Commit

Permalink
more formatting changes
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Sep 27, 2023
1 parent 7ba7c5f commit ca4e1e3
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 102 deletions.
63 changes: 20 additions & 43 deletions goatools/anno/annoreader_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,12 @@ class AnnoReaderBase(object):
tic = timeit.default_timer()

# Expected values for a Qualifier
exp_qualifiers = set(
[
# Seen in both GAF and gene2go
"not",
"contributes_to",
"colocalizes_with",
]
)

exp_qualifiers = {
# Seen in both GAF and gene2go
"not",
"contributes_to",
"colocalizes_with",
}
valid_formats = {"gpad", "gaf", "gene2go", "id2gos"}
exp_nss = {"BP", "MF", "CC"}

Expand Down Expand Up @@ -123,11 +120,7 @@ def get_id2gos(self, namespace=None, prt=sys.stdout, **kws):
nspc, assoc = self._get_1ns_assn(namespace)
id2gos = self._get_id2gos(assoc, **kws)
if prt:
prt.write(
"{N} IDs in loaded association branch, {NS}\n".format(
N=len(id2gos), NS=nspc
)
)
prt.write(f"{len(id2gos)} IDs in loaded association branch, {nspc}\n")
return id2gos
if prt and self.godag is None:
logging.warning(
Expand All @@ -137,19 +130,15 @@ def get_id2gos(self, namespace=None, prt=sys.stdout, **kws):
)
id2gos = self._get_id2gos(self.associations, **kws)
if prt:
prt.write("{N} IDs in all associations\n".format(N=len(id2gos)))
prt.write(f"{len(id2gos)} IDs in all associations\n")
return id2gos

def _get_1ns_assn(self, namespace_usr):
"""Get one namespace, given a user-provided namespace or a default"""
# If all namespaces were loaded
if self.namespaces is None:
# Return user-specified namespace, if provided. Otherwise BP
nspc = (
self._get_biggest_namespace()
if namespace_usr is None
else namespace_usr
)
nspc = namespace_usr or self._get_biggest_namespace()
# Return one namespace
if nspc in set(NAMESPACE2NS.values()):
return nspc, [nt for nt in self.associations if nt.NS == nspc]
Expand All @@ -159,11 +148,7 @@ def _get_1ns_assn(self, namespace_usr):
if len(self.namespaces) == 1:
nspc = next(iter(self.namespaces))
if namespace_usr is not None and nspc != namespace_usr:
print(
"**WARNING: IGNORING {ns}; ONLY {NS} WAS LOADED".format(
ns=namespace_usr, NS=nspc
)
)
print(f"**WARNING: IGNORING {namespace_usr}; ONLY {nspc} WAS LOADED")
return nspc, self.associations
if namespace_usr is None:
print(
Expand All @@ -180,9 +165,9 @@ def _get_biggest_namespace(self):

def has_ns(self):
"""Return True if namespace field, NS exists on annotation namedtuples"""
assert self.associations, "NO ASSOCIATIONS IN file({}): {}".format(
self.filename, self.associations
)
assert (
self.associations
), f"NO ASSOCIATIONS IN file({self.filename}): {self.associations}"
return hasattr(next(iter(self.associations)), "NS")

def _get_id2gos(
Expand All @@ -191,7 +176,7 @@ def _get_id2gos(
propagate_counts=False,
relationships=None,
prt=sys.stdout,
**kws
**kws,
):
"""Return given ntannos_usr in a dict, id2gos"""
options = AnnoOptions(self.evobj, **kws)
Expand All @@ -205,8 +190,6 @@ def _get_id2gos(
)
if options.b_geneid2gos:
return dbid2goids
# if not a2bs:
# raise RuntimeError('**ERROR: NO ASSOCATIONS FOUND: {FILE}'.format(FILE=self.filename))
return self._get_goid2dbids(dbid2goids)

def _get_anno_in_dag(self, ntsanno):
Expand Down Expand Up @@ -252,7 +235,7 @@ def _prt_qualifiers(associations, prt=sys.stdout):
for fld, cnt in cx.Counter(
q for nt in associations for q in nt.Qualifier
).most_common():
prt.write(" {N:6,} {FLD}\n".format(N=cnt, FLD=fld))
prt.write(f" {cnt:6,} {fld}\n")

def reduce_annotations(self, annotations, options):
"""Reduce annotations to ones used to identify enrichment (normally exclude ND and NOT)."""
Expand All @@ -279,18 +262,12 @@ def _get_go2ancestors(self, goids_assoc_usr, relationships, prt=sys.stdout):
# Get GO IDs in annotations that are in GO DAG
goids_avail = set(_godag)
goids_missing = self._rpt_goids_notfound(goids_assoc_usr, goids_avail)
goids_assoc_cur = goids_assoc_usr.intersection(goids_avail).difference(
goids_missing
)
goids_assoc_cur = goids_assoc_usr & goids_avail - goids_missing
# Get GO Term for each current GO ID in the annotations
_go2obj_assc = {go: _godag[go] for go in goids_assoc_cur}
go2ancestors = get_go2parents_go2obj(_go2obj_assc, relationships, prt)
if prt:
prt.write(
"{N} GO IDs -> {M} go2ancestors\n".format(
N=len(goids_avail), M=len(go2ancestors)
)
)
prt.write("{len(goids_avail)} GO IDs -> {len(go2ancestors)} go2ancestors\n")
return go2ancestors

@staticmethod
Expand Down Expand Up @@ -350,7 +327,7 @@ def hms(self, msg, tic=None, prt=sys.stdout):
tic = self.tic
now = timeit.default_timer()
hms = str(datetime.timedelta(seconds=now - tic))
prt.write("{HMS}: {MSG}\n".format(HMS=hms, MSG=msg))
prt.write(f"{hms}: {msg}\n")
return now

def chk_associations(self, fout_err=None):
Expand All @@ -374,7 +351,7 @@ def chk_qualifiers(self):
qual = ntd.Qualifier
assert isinstance(
qual, set
), "{NAME}: QUALIFIER MUST BE A LIST: {NT}".format(NAME=self.name, NT=ntd)
), f"{self.name}: QUALIFIER MUST BE A LIST: {ntd}"
assert qual != set([""]), ntd
assert qual != set(["-"]), ntd
assert "always" not in qual, "SPEC SAID IT WOULD BE THERE"
Expand Down Expand Up @@ -402,7 +379,7 @@ def prt_counts(self, prt=sys.stdout):
"""Print the number of taxids stored."""
num_annos = len(self.associations)
# 792,891 annotations for 3 taxids stored: 10090 7227 9606
prt.write("{A:8,} annotations\n".format(A=num_annos))
prt.write(f"{num_annos:8,} annotations\n")


# Copyright (C) 2016-present, DV Klopfenstein, H Tang. All rights reserved."
6 changes: 2 additions & 4 deletions goatools/anno/idtogos_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@ def wr_id2gos(fout_txt, id2gos):
prt.write(
"{GENE}\t{GOs}\n".format(GENE=geneid, GOs=";".join(sorted(goset)))
)
print(" {N} annotations WROTE: {TXT}".format(N=len(id2gos), TXT=fout_txt))
print(f" {len(id2gos)} annotations WROTE: {fout_txt}")

def prt_summary_anno2ev(self, prt=sys.stdout):
"""Print a summary of all Evidence Codes seen in annotations"""
prt.write(
"**NOTE: No evidence codes in associations: {F}\n".format(F=self.filename)
)
prt.write(f"**NOTE: No evidence codes in associations: {self.filename}\n")

# pylint: disable=unused-argument
def reduce_annotations(self, associations, options):
Expand Down
54 changes: 13 additions & 41 deletions goatools/cli/find_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def __init__(self, args):
self.godag = GODag(
obo_file=self.args.obo,
optional_attrs=godag_optional_attrs,
load_obsolete=True,
load_obsolete=self.args.obsolete in ("keep", "replace"),
)
# GET: Gene2GoReader, GafReader, GpadReader, or IdToGosReader
self.objanno = self._get_objanno(self.args.filenames[2])
Expand Down Expand Up @@ -387,10 +387,8 @@ def _get_ns(self):
"""Return namespaces."""
exp_nss = {"BP", "MF", "CC"}
act_nss = set(self.args.ns.split(","))
assert not act_nss.difference(
exp_nss
), "EXPECTED NAMESPACES({E}); GOT({A})".format(
E=",".join(exp_nss), A=",".join(act_nss.difference(exp_nss))
assert not act_nss - exp_nss, "EXPECTED NAMESPACES({E}); GOT({A})".format(
E=",".join(exp_nss), A=",".join(act_nss - exp_nss)
)
return None if act_nss == exp_nss else act_nss

Expand Down Expand Up @@ -437,8 +435,6 @@ def prt_outfiles_flat(self, goea_results, outfiles):
for outfile in outfiles:
if outfile.endswith(".xlsx"):
self.objgoeans.wr_xlsx(outfile, goea_results, **kws)
# elif outfile.endswith(".txt"): # TBD
# pass
else:
self.objgoeans.wr_tsv(outfile, goea_results, **kws)

Expand Down Expand Up @@ -502,13 +498,13 @@ def chk_genes(self, study, pop, ntsassoc=None):
overlap = self.get_overlap(study, pop)
if overlap < 0.95:
sys.stderr.write(
"\nWARNING: only {} fraction of genes/proteins in study are found in "
"the population background.\n\n".format(overlap)
f"\nWARNING: only {overlap} fraction of genes/proteins in study are found in "
"the population background.\n\n"
)
if overlap <= self.args.min_overlap:
exit(
"\nERROR: only {} of genes/proteins in the study are found in the "
"background population. Please check.\n".format(overlap)
f"\nERROR: only {overlap} of genes/proteins in the study are found in the "
"background population. Please check.\n"
)
# Population and associations
if ntsassoc is not None:
Expand Down Expand Up @@ -571,7 +567,7 @@ def get_pval_field(self):
def rd_files(self, study_fn, pop_fn):
"""Read files and return study and population."""
study, pop = self._read_geneset(study_fn, pop_fn)
print("Study: {0} vs. Population {1}\n".format(len(study), len(pop)))
print(f"Study: {len(study)} vs. Population {len(pop)}\n")
return study, pop

def _read_geneset(self, study_fn, pop_fn):
Expand All @@ -597,17 +593,11 @@ def _read_geneset(self, study_fn, pop_fn):
def _get_optional_attrs(self):
"""Given keyword args, return optional_attributes to be loaded into the GODag."""
if self.args.relationship:
return {
"relationship",
}
return {"relationship"}
if self.args.relationships is not None:
return {
"relationship",
}
return {"relationship"}
if self.sections:
return {
"relationship",
}
return {"relationship"}
if self.args.obsolete == "replace":
return {"replaced_by", "consider"}
return None
Expand Down Expand Up @@ -649,9 +639,8 @@ def __init__(self, objcli, godag_version):
self.ver_list = [
godag_version,
self.grprdflt.ver_goslims,
"Sections: {S}".format(S=objcli.args.sections),
f"Sections: {objcli.args.sections}",
]
# self.objaartall = self._init_objaartall()

def get_objgrpwr(self, goea_results):
"""Get a GrpWr object to write grouped GOEA results."""
Expand All @@ -671,17 +660,10 @@ def get_sortobj(self, goea_results, **kws):
sortobj = Sorter(grprobj, section_sortby=lambda nt: getattr(nt, self.pval_fld))
return sortobj

# @staticmethod
# def get_objaart(goea_results, **kws):
# """Return a AArtGeneProductSetsOne object."""
# nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
# # objaart = AArtGeneProductSetsOne(name, goea_nts, self)

def _init_objaartall(self):
"""Get background database info for making ASCII art."""
kws = {
"sortgo": lambda nt: [nt.NS, nt.dcnt],
# fmtgo=('{p_fdr_bh:8.2e} {GO} '
# Formatting for GO terms in grouped GO list
"fmtgo": (
"{hdr1usr01:2} {NS} {GO} {s_fdr_bh:8} "
Expand All @@ -694,7 +676,6 @@ def _init_objaartall(self):
"{dcnt:5} R{reldepth:02} "
"{GO_name} ({study_count} study genes)\n"
),
# itemid2name=ensmusg2symbol}
}
return AArtGeneProductSetsAll(self.grprdflt, self.hdrobj, **kws)

Expand Down Expand Up @@ -725,18 +706,9 @@ def prt_outfiles_grouped(self, outfiles):
def wr_xlsx(self, fout_xlsx):
"""Print grouped GOEA results into an xlsx file."""
objwr = WrXlsxSortedGos("GOEA", self.sortobj)
#### fld2fmt['ratio_in_study'] = '{:>8}'
#### fld2fmt['ratio_in_pop'] = '{:>12}'
#### ntfld2wbfmtdict = {
# ntfld_wbfmt = {
# 'ratio_in_study': {'align':'right'},
# 'ratio_in_pop':{'align':'right'}}
kws_xlsx = {
"title": self.ver_list,
"fld2fmt": {f: "{:8.2e}" for f in self.flds_cur if f[:2] == "p_"},
#'ntfld_wbfmt': ntfld_wbfmt,
#### 'ntval2wbfmtdict': ntval2wbfmtdict,
#'hdrs': [],
"prt_flds": self.flds_cur,
}
objwr.wr_xlsx_nts(fout_xlsx, self.desc2nts, **kws_xlsx)
Expand All @@ -749,7 +721,7 @@ def wr_tsv(self, fout_tsv):
"prt_flds": self.flds_cur,
}
prt_tsv_sections(prt, self.desc2nts["sections"], **kws_tsv)
print(" WROTE: {TSV}".format(TSV=fout_tsv))
print(f" WROTE: {fout_tsv}")

def wr_txt(self, fout_txt):
"""Write to a file GOEA results in an ASCII text format."""
Expand Down
24 changes: 10 additions & 14 deletions goatools/obo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ def __init__(self, obo_file="go-basic.obo", optional_attrs=None):
self.obo_file = obo_file
# GOTerm attributes that are necessary for any operations:
else:
raise Exception(
"COULD NOT READ({OBO})\n"
raise ValueError(
f"COULD NOT READ({obo_file})\n"
"download obo file first\n "
"[http://geneontology.org/ontology/"
"go-basic.obo]".format(OBO=obo_file)
"go-basic.obo]"
)

def __iter__(self):
Expand Down Expand Up @@ -186,27 +186,23 @@ def __str__(self):

def __repr__(self):
"""Print GO ID and all attributes in GOTerm class."""
ret = ["GOTerm('{ID}'):".format(ID=self.item_id)]
ret = [f"GOTerm('{self.item_id}'):"]
for key, val in self.__dict__.items():
if isinstance(val, (int, str)):
ret.append("{K}:{V}".format(K=key, V=val))
ret.append(f"{key}:{val}")
elif val is not None:
ret.append("{K}: {V} items".format(K=key, V=len(val)))
ret.append(f"{key}: {len(val)} items")
if len(val) < 10:
if not isinstance(val, dict):
for elem in val:
ret.append(" {ELEM}".format(ELEM=elem))
ret.append(f" {elem}")
else:
for typedef, terms in val.items():
ret.append(
" {TYPEDEF}: {NTERMS} items".format(
TYPEDEF=typedef, NTERMS=len(terms)
)
)
ret.append(f" {typedef}: {len(terms)} items")
for term in terms:
ret.append(" {TERM}".format(TERM=term))
ret.append(f" {term}")
else:
ret.append("{K}: None".format(K=key))
ret.append(f"{key}: None")
return "\n ".join(ret)

def has_parent(self, term):
Expand Down

0 comments on commit ca4e1e3

Please sign in to comment.