-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MRG] Explore addition of a
sketch
command. (#1159)
* add sourmash sketch command * it's alive! sourmash sketch dna works! * refactor make_minhashes code to allow more flexibility * refactor internal compute code to better reflect reality * update sketch dna as well * pluralize signature factory and handling to get ready for sketch * initial implementation with parameter strings * add a variety of tests * add sketch protein and sketch translate * add --hp and --dayhoff to translate and protein * add aa|prot and rna aliases for sketch command * hacked and slashed ksize*3 :) * refactor and simplify sketch code * clean up command_sketch a bit * provide better defaults for protein, translate * allow param strings to set moltype and use moltype defaults * add some tests for default parameters, etc * refactoring of names * check bad param string * fix scaled/num checking and (some) associated tests * do tests for multiple output sigs * update protein/dayhoff/hp defaults * add invalid parameter checking on dna/override protein, etc * remove 10x tests, rename compute -> sketch for remaining * add tests for incompatible sketch commands * multiple ksizes within one param string * cleanup of comments
- Loading branch information
Showing
9 changed files
with
1,407 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
"""Define the command line interface for sourmash sketch | ||
The top level CLI is defined in ../__init__.py. This module defines the CLI for | ||
`sourmash sketch` operations. | ||
""" | ||
|
||
from . import dna | ||
from . import dna as rna | ||
from . import protein | ||
from . import protein as aa | ||
from . import protein as prot | ||
from . import translate | ||
from ..utils import command_list | ||
from argparse import SUPPRESS, RawDescriptionHelpFormatter | ||
import os | ||
import sys | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('sketch', formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS) | ||
desc = 'Operations\n' | ||
clidir = os.path.dirname(__file__) | ||
ops = command_list(clidir) | ||
for subcmd in ops: | ||
docstring = getattr(sys.modules[__name__], subcmd).__doc__ | ||
helpstring = 'sourmash sketch {op:s} --help'.format(op=subcmd) | ||
desc += ' {hs:33s} {ds:s}\n'.format(hs=helpstring, ds=docstring) | ||
s = subparser.add_subparsers( | ||
title='Create signatures', dest='subcmd', metavar='subcmd', help=SUPPRESS, | ||
description=desc | ||
) | ||
for subcmd in ops: | ||
getattr(sys.modules[__name__], subcmd).subparser(s) | ||
subparser._action_groups.reverse() | ||
subparser._optionals.title = 'Options' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
"""create DNA signatures""" | ||
|
||
import csv | ||
|
||
import sourmash | ||
from sourmash.logging import notify, print_results, error | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('dna', aliases=['rna']) | ||
subparser.add_argument( | ||
'--license', default='CC0', type=str, | ||
help='signature license. Currently only CC0 is supported.' | ||
) | ||
subparser.add_argument( | ||
'--check-sequence', action='store_true', | ||
help='complain if input sequence is invalid' | ||
) | ||
subparser.add_argument( | ||
'-p', '--param-string', default=[], | ||
help='signature parameters to use.', action='append', | ||
) | ||
|
||
subparser.add_argument( | ||
'filenames', nargs='+', help='file(s) of sequences' | ||
) | ||
file_args = subparser.add_argument_group('File handling options') | ||
file_args.add_argument( | ||
'-f', '--force', action='store_true', | ||
help='recompute signatures even if the file exists' | ||
) | ||
file_args.add_argument( | ||
'-o', '--output', | ||
help='output computed signatures to this file' | ||
) | ||
file_args.add_argument( | ||
'--merge', '--name', type=str, default='', metavar="FILE", | ||
help='merge all input files into one signature file with the ' | ||
'specified name' | ||
) | ||
file_args.add_argument( | ||
'--outdir', help='output computed signatures to this directory' | ||
) | ||
file_args.add_argument( | ||
'--singleton', action='store_true', | ||
help='compute a signature for each sequence record individually' | ||
) | ||
file_args.add_argument( | ||
'--name-from-first', action='store_true', | ||
help='name the signature generated from each file after the first ' | ||
'record in the file' | ||
) | ||
file_args.add_argument( | ||
'--randomize', action='store_true', | ||
help='shuffle the list of input filenames randomly' | ||
) | ||
|
||
|
||
def main(args): | ||
import sourmash.command_sketch | ||
return sourmash.command_sketch.dna(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
"""create protein signatures""" | ||
|
||
import csv | ||
|
||
import sourmash | ||
from sourmash.logging import notify, print_results, error | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('protein', aliases=['aa', 'prot']) | ||
subparser.add_argument( | ||
'--license', default='CC0', type=str, | ||
help='signature license. Currently only CC0 is supported.' | ||
) | ||
subparser.add_argument( | ||
'--check-sequence', action='store_true', | ||
help='complain if input sequence is invalid' | ||
) | ||
subparser.add_argument( | ||
'-p', '--param-string', default=[], | ||
help='signature parameters to use.', action='append', | ||
) | ||
|
||
subparser.add_argument( | ||
'filenames', nargs='+', help='file(s) of sequences' | ||
) | ||
file_args = subparser.add_argument_group('File handling options') | ||
file_args.add_argument( | ||
'-f', '--force', action='store_true', | ||
help='recompute signatures even if the file exists' | ||
) | ||
file_args.add_argument( | ||
'-o', '--output', | ||
help='output computed signatures to this file' | ||
) | ||
file_args.add_argument( | ||
'--merge', '--name', type=str, default='', metavar="FILE", | ||
help='merge all input files into one signature file with the ' | ||
'specified name' | ||
) | ||
file_args.add_argument( | ||
'--outdir', help='output computed signatures to this directory' | ||
) | ||
file_args.add_argument( | ||
'--singleton', action='store_true', | ||
help='compute a signature for each sequence record individually' | ||
) | ||
file_args.add_argument( | ||
'--name-from-first', action='store_true', | ||
help='name the signature generated from each file after the first ' | ||
'record in the file' | ||
) | ||
file_args.add_argument( | ||
'--randomize', action='store_true', | ||
help='shuffle the list of input filenames randomly' | ||
) | ||
file_args.add_argument( | ||
'--dayhoff', action='store_true', | ||
help='compute sketches using the dayhoff alphabet instead' | ||
) | ||
file_args.add_argument( | ||
'--hp', action='store_true', | ||
help='compute sketches using the dayhoff alphabet instead' | ||
) | ||
|
||
|
||
def main(args): | ||
import sourmash.command_sketch | ||
return sourmash.command_sketch.protein(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
"""create protein signature from DNA/RNA sequence""" | ||
|
||
import csv | ||
|
||
import sourmash | ||
from sourmash.logging import notify, print_results, error | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('translate') | ||
subparser.add_argument( | ||
'--license', default='CC0', type=str, | ||
help='signature license. Currently only CC0 is supported.' | ||
) | ||
subparser.add_argument( | ||
'--check-sequence', action='store_true', | ||
help='complain if input sequence is invalid' | ||
) | ||
subparser.add_argument( | ||
'-p', '--param-string', default=[], | ||
help='signature parameters to use.', action='append', | ||
) | ||
|
||
subparser.add_argument( | ||
'filenames', nargs='+', help='file(s) of sequences' | ||
) | ||
file_args = subparser.add_argument_group('File handling options') | ||
file_args.add_argument( | ||
'-f', '--force', action='store_true', | ||
help='recompute signatures even if the file exists' | ||
) | ||
file_args.add_argument( | ||
'-o', '--output', | ||
help='output computed signatures to this file' | ||
) | ||
file_args.add_argument( | ||
'--merge', '--name', type=str, default='', metavar="FILE", | ||
help='merge all input files into one signature file with the ' | ||
'specified name' | ||
) | ||
file_args.add_argument( | ||
'--outdir', help='output computed signatures to this directory' | ||
) | ||
file_args.add_argument( | ||
'--singleton', action='store_true', | ||
help='compute a signature for each sequence record individually' | ||
) | ||
file_args.add_argument( | ||
'--name-from-first', action='store_true', | ||
help='name the signature generated from each file after the first ' | ||
'record in the file' | ||
) | ||
file_args.add_argument( | ||
'--randomize', action='store_true', | ||
help='shuffle the list of input filenames randomly' | ||
) | ||
file_args.add_argument( | ||
'--dayhoff', action='store_true', | ||
help='compute sketches using the dayhoff alphabet instead' | ||
) | ||
file_args.add_argument( | ||
'--hp', action='store_true', | ||
help='compute sketches using the dayhoff alphabet instead' | ||
) | ||
|
||
|
||
def main(args): | ||
import sourmash.command_sketch | ||
return sourmash.command_sketch.translate(args) |
Oops, something went wrong.