Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] refactor argparse.FileType out of sourmash argument handling. #853

Merged
merged 17 commits into from
Jan 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sourmash/cli/categorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def subparser(subparsers):
add_moltype_args(subparser)

# TODO: help messages in these
subparser.add_argument('--csv', type=argparse.FileType('at'))
subparser.add_argument('--csv', help='output summary CSV to this file')
subparser.add_argument('--load-csv', default=None)


Expand Down
4 changes: 1 addition & 3 deletions sourmash/cli/compare.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""compare genomes"""

from argparse import FileType

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


Expand Down Expand Up @@ -29,7 +27,7 @@ def subparser(subparsers):
help='compare all signatures underneath directories'
)
subparser.add_argument(
'--csv', metavar='F', type=FileType('w'),
'--csv', metavar='F',
help='write matrix to specified file in CSV format (with column '
'headers)'
)
Expand Down
8 changes: 3 additions & 5 deletions sourmash/cli/gather.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""search a metagenome signature for multiple non-
overlapping matches"""

from argparse import FileType

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


Expand All @@ -25,11 +23,11 @@ def subparser(subparsers):
help='search all signatures underneath directories'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
'-o', '--output', metavar='FILE',
help='output CSV containing matches to this file'
)
subparser.add_argument(
'--save-matches', metavar='FILE', type=FileType('wt'),
'--save-matches', metavar='FILE',
help='save the matched signatures from the database to the '
'specified file'
)
Expand All @@ -38,7 +36,7 @@ def subparser(subparsers):
help='threshold (in bp) for reporting results (default=50,000)'
)
subparser.add_argument(
'--output-unassigned', metavar='FILE', type=FileType('wt'),
'--output-unassigned', metavar='FILE',
help='output unassigned portions of the query as a signature to the '
'specified file'
)
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/import_csv.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
"""'sourmash import_csv' description goes here"""

from argparse import FileType
import sys


def subparser(subparsers):
subparser = subparsers.add_parser('import_csv')
subparser.add_argument('mash_csvfile', help='CSV file with mash sketches')
subparser.add_argument(
'-o', '--output', type=FileType('wt'),
default=sys.stdout,
help='save signature generated from data here'
'-o', '--output',
help='save signature generated from data to this file (default stdout)'
)


Expand Down
7 changes: 2 additions & 5 deletions sourmash/cli/lca/classify.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""classify genomes"""

from argparse import FileType


def subparser(subparsers):
subparser = subparsers.add_parser('classify')
Expand All @@ -17,9 +15,8 @@ def subparser(subparsers):
help='output debugging output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
help='output CSV to the specified file; by default output to terminal '
'(standard output)'
'-o', '--output', metavar='FILE', default='-',
help='output CSV to the specified file; by default output to stdout'
)
subparser.add_argument('--scaled', type=float)
subparser.add_argument(
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/lca/gather.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""classify metagenomes"""

from argparse import FileType


def subparser(subparsers):
subparser = subparsers.add_parser('gather')
Expand All @@ -15,11 +13,11 @@ def subparser(subparsers):
help='output debugging output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
'-o', '--output', metavar='FILE',
help='output CSV containing matches to this file'
)
subparser.add_argument(
'--output-unassigned', metavar='FILE', type=FileType('wt'),
'--output-unassigned', metavar='FILE',
help='output unassigned portions of the query as a signature to this '
'file'
)
Expand Down
4 changes: 1 addition & 3 deletions sourmash/cli/lca/summarize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""summarize mixture"""

from argparse import FileType


def subparser(subparsers):
subparser = subparsers.add_parser('summarize')
Expand All @@ -13,7 +11,7 @@ def subparser(subparsers):
help='load all signatures underneath directories'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
'-o', '--output', metavar='FILE',
help='file to which CSV output will be written'
)
subparser.add_argument('--scaled', metavar='FLOAT', type=float)
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/search.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""search a signature against a list of signatures"""

from argparse import FileType

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


Expand All @@ -27,7 +25,7 @@ def subparser(subparsers):
help='minimum threshold for reporting matches; default=0.08'
)
subparser.add_argument(
'--save-matches', metavar='FILE', type=FileType('wt'),
'--save-matches', metavar='FILE',
help='output matching signatures to the specified file'
)
subparser.add_argument(
Expand All @@ -52,7 +50,7 @@ def subparser(subparsers):
help='downsample query to this scaled factor (yields greater speed)'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
'-o', '--output', metavar='FILE',
help='output CSV containing matches to this file'
)
add_ksize_arg(subparser, 31)
Expand Down
75 changes: 3 additions & 72 deletions sourmash/cli/sig/describe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""show details of signature"""

from argparse import FileType
import csv

import sourmash
Expand All @@ -15,79 +14,11 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'--csv', metavar='FILE', type=FileType('wt'),
'--csv', metavar='FILE',
help='output information to a CSV file'
)


def describe(signatures, quiet=True, csvout=None):
siglist = []
for sigfile in signatures:
this_siglist = []
try:
this_siglist = sourmash.load_signatures(sigfile, quiet=quiet, do_raise=True)
for k in this_siglist:
siglist.append((k, sigfile))
except Exception as exc:
error('\nError while reading signatures from {}:'.format(sigfile))
error(str(exc))
error('(continuing)')

notify('loaded {} signatures from {}...', len(siglist), sigfile,
end='\r')

notify('loaded {} signatures total.', len(siglist))

w = None
if csvout:
w = csv.DictWriter(
csvout, [
'signature_file', 'md5', 'ksize', 'moltype', 'num', 'scaled',
'n_hashes', 'seed', 'with_abundance','name', 'filename',
'license'
],
extrasaction='ignore'
)
w.writeheader()

# extract info, write as appropriate.
for (sig, signature_file) in siglist:
mh = sig.minhash
ksize = mh.ksize
moltype = 'DNA'
if mh.is_protein:
if mh.dayhoff:
moltype = 'dayhoff'
elif mh.hp:
moltype = 'hp'
else:
moltype = 'protein'
scaled = mh.scaled
num = mh.num
seed = mh.seed
n_hashes = len(mh)
with_abundance = 0
if mh.track_abundance:
with_abundance = 1
md5 = sig.md5sum()
name = sig.name()
filename = sig.filename
license = sig.license

if w:
w.writerow(locals())

print_results('''\
---
signature filename: {signature_file}
signature: {name}
source file: {filename}
md5: {md5}
k={ksize} molecule={moltype} num={num} scaled={scaled} seed={seed} track_abundance={with_abundance}
size: {n_hashes}
signature license: {license}
''', **locals())


def main(args):
describe(args.signatures, quiet=args.quiet, csvout=args.csv)
import sourmash
return sourmash.sig.__main__.describe(args)
6 changes: 2 additions & 4 deletions sourmash/cli/sig/downsample.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""downsample one or more signatures"""

from argparse import FileType
import sys

from sourmash.cli.utils import add_moltype_args, add_ksize_arg
Expand All @@ -22,9 +21,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)
add_ksize_arg(subparser, 31)
add_moltype_args(subparser)
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/sig/export.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""export a signature, e.g. to mash"""

from argparse import FileType
import sys

from sourmash.cli.utils import add_ksize_arg, add_moltype_args
Expand All @@ -14,9 +13,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)
add_ksize_arg(subparser, 31)
add_moltype_args(subparser)
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/sig/extract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""extract one or more signatures"""

from argparse import FileType
import sys

from sourmash.cli.utils import add_moltype_args, add_ksize_arg
Expand All @@ -14,9 +13,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)
subparser.add_argument(
'--md5', default=None,
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/sig/filter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""filter k-mers on abundance"""

from argparse import FileType
import sys

from sourmash.cli.utils import add_moltype_args, add_ksize_arg
Expand All @@ -14,9 +13,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)
subparser.add_argument(
'--md5', type=str, default=None,
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/sig/flatten.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""remove abundances"""

from argparse import FileType
import sys

from sourmash.cli.utils import add_moltype_args, add_ksize_arg
Expand All @@ -14,9 +13,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)
subparser.add_argument(
'--md5', default=None,
Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/sig/ingest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""import a mash or other signature"""

from argparse import FileType
import sys


Expand All @@ -15,9 +14,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)


Expand Down
6 changes: 2 additions & 4 deletions sourmash/cli/sig/intersect.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""intersect one or more signatures"""

from argparse import FileType
import sys

from sourmash.cli.utils import add_moltype_args, add_ksize_arg
Expand All @@ -14,9 +13,8 @@ def subparser(subparsers):
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE', type=FileType('wt'),
default=sys.stdout,
help='output signature to this file'
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)'
)
subparser.add_argument(
'-A', '--abundances-from', metavar='FILE',
Expand Down
Loading