Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Deprecate sourmash.load_signatures as public API; refactor a bit. #1279

Merged
merged 12 commits into from
Feb 4, 2021
55 changes: 53 additions & 2 deletions src/sourmash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import math
import os
from deprecation import deprecated

from ._lowlevel import ffi, lib

Expand All @@ -30,13 +31,63 @@
MAX_HASH = get_minhash_max_hash()

from .signature import (
load_signatures,
load_signatures as load_signatures_private,
load_one_signature,
SourmashSignature,
save_signatures,
)

from .sbtmh import load_sbt_index, search_sbt_index, create_sbt_index
@deprecated(deprecated_in="3.5.1", removed_in="5.0",
current_version=VERSION,
details='Use load_file_as_signatures instead.')
def load_signatures(*args, **kwargs):
"""Load a JSON string with signatures into classes.

Returns list of SourmashSignature objects.

Note, the order is not necessarily the same as what is in the source file.

This function has been deprecated as of 3.5.1; please use
'load_file_as_signatures' instead. Note that in 4.0, the 'quiet' argument
has been removed and the function no longer outputs to stderr.
Moreover, do_raise is now True by default.
"""
return load_signatures_private(*args, **kwargs)

from .sbtmh import load_sbt_index as load_sbt_index_private
from .sbtmh import search_sbt_index as search_sbt_index_private

@deprecated(deprecated_in="3.5.1", removed_in="5.0",
current_version=VERSION,
details='Use load_file_as_index instead.')
def load_sbt_index(*args, **kwargs):
"""Load and return an SBT index.

This function has been deprecated as of 3.5.1; please use
'load_file_as_index' instead.
"""
return load_sbt_index_private(*args, **kwargs)


@deprecated(deprecated_in="3.5.1", removed_in="5.0",
current_version=VERSION,
details='Use the new Index API instead.')
def search_sbt_index(*args, **kwargs):
"""\
Search an SBT index `tree` with signature `query` for matches above
`threshold`.

Usage:

for match_sig, similarity in search_sbt_index(tree, query, threshold):
...

This function has been deprecated as of 3.5.1; please use
'idx = load_file_as_index(...); idx.search(query, threshold=...)' instead.
"""
return load_sbt_index_private(*args, **kwargs)

from .sbtmh import create_sbt_index
from . import lca
from . import sbt
from . import sbtmh
Expand Down
3 changes: 2 additions & 1 deletion src/sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

import screed
from .compare import compare_all_pairs, compare_serial_containment
from . import MinHash, load_sbt_index, create_sbt_index
from . import MinHash
from .sbtmh import load_sbt_index, create_sbt_index
from . import signature as sig
from . import sourmash_args
from .logging import notify, error, print_results, set_quiet
Expand Down
2 changes: 1 addition & 1 deletion src/sourmash/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def save(self, path):
@classmethod
def load(cls, location):
from .signature import load_signatures
si = load_signatures(location)
si = load_signatures(location, do_raise=True)

lidx = LinearIndex(si, filename=location)
return lidx
Expand Down
2 changes: 1 addition & 1 deletion src/sourmash/lca/command_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import csv
from collections import defaultdict

from sourmash import sourmash_args, load_signatures
from sourmash import sourmash_args
from sourmash.sourmash_args import load_file_as_signatures
from sourmash.logging import notify, error, debug, set_quiet
from . import lca_utils
Expand Down
6 changes: 0 additions & 6 deletions src/sourmash/signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ def _detect_input_type(data):

def load_signatures(
data, ksize=None, select_moltype=None, ignore_md5sum=False, do_raise=False,
quiet=False
):
"""Load a JSON string with signatures into classes.

Expand All @@ -248,8 +247,6 @@ def load_signatures(

input_type = _detect_input_type(data)
if input_type == SigInput.UNKNOWN:
if not quiet:
error("Error in parsing signature; quitting. Cannot open file or invalid signature")
if do_raise:
raise Exception("Error in parsing signature; quitting. Cannot open file or invalid signature")
return
Expand Down Expand Up @@ -301,9 +298,6 @@ def load_signatures(
yield sig

except Exception as e:
if not quiet:
error("Error in parsing signature; quitting.")
error("Exception: {}", str(e))
if do_raise:
raise

Expand Down
10 changes: 5 additions & 5 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Utility functions for dealing with input args to the sourmash command line.
Utility functions for sourmash CLI commands.
"""
import sys
import os
Expand All @@ -9,7 +9,7 @@

import screed

from sourmash import load_sbt_index
from sourmash.sbtmh import load_sbt_index
from sourmash.lca.lca_db import load_single_database
import sourmash.exceptions

Expand Down Expand Up @@ -365,7 +365,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):

# special case stdin
if not loaded and filename == '-':
db = sourmash.load_signatures(sys.stdin, quiet=True, do_raise=True)
db = signature.load_signatures(sys.stdin, do_raise=True)
db = list(db)
loaded = True
dbtype = DatabaseType.SIGLIST
Expand All @@ -376,7 +376,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
for thisfile in traverse_find_sigs([filename], traverse_yield_all):
try:
with open(thisfile, 'rt') as fp:
x = sourmash.load_signatures(fp, quiet=True, do_raise=True)
x = signature.load_signatures(fp, do_raise=True)
siglist = list(x)
all_sigs.extend(siglist)
except (IOError, sourmash.exceptions.SourmashError):
Expand All @@ -394,7 +394,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
# CTB: could make this a generator, with some trickery; but for
# now, just force into list.
with open(filename, 'rt') as fp:
db = sourmash.load_signatures(fp, quiet=True, do_raise=True)
db = signature.load_signatures(fp, do_raise=True)
db = list(db)

loaded = True
Expand Down
8 changes: 4 additions & 4 deletions tests/test_sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sourmash.exceptions import IndexNotSupported
from sourmash.sbt import SBT, GraphFactory, Leaf, Node
from sourmash.sbtmh import (SigLeaf, search_minhashes,
search_minhashes_containment)
search_minhashes_containment, load_sbt_index)
from sourmash.sbt_storage import (FSStorage, RedisStorage,
IPFSStorage, ZipStorage)

Expand Down Expand Up @@ -775,7 +775,7 @@ def test_sbt_protein_command_index(c):
c.run_sourmash('index', db_out, sigfile1, sigfile2,
'--scaled', '100', '-k', '19', '--protein')

db2 = sourmash.load_sbt_index(db_out)
db2 = load_sbt_index(db_out)

sig1 = sourmash.load_one_signature(sigfile1)
sig2 = sourmash.load_one_signature(sigfile2)
Expand Down Expand Up @@ -821,7 +821,7 @@ def test_sbt_hp_command_index(c):
c.run_sourmash('index', db_out, sigfile1, sigfile2,
'--scaled', '100', '-k', '19', '--hp')

db2 = sourmash.load_sbt_index(db_out)
db2 = load_sbt_index(db_out)

sig1 = sourmash.load_one_signature(sigfile1)
sig2 = sourmash.load_one_signature(sigfile2)
Expand Down Expand Up @@ -867,7 +867,7 @@ def test_sbt_dayhoff_command_index(c):
c.run_sourmash('index', db_out, sigfile1, sigfile2,
'--scaled', '100', '-k', '19', '--dayhoff')

db2 = sourmash.load_sbt_index(db_out)
db2 = load_sbt_index(db_out)

sig1 = sourmash.load_one_signature(sigfile1)
sig2 = sourmash.load_one_signature(sigfile2)
Expand Down
9 changes: 8 additions & 1 deletion tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -4121,6 +4121,12 @@ def test_do_sourmash_index_zipfile_append(c):
first_half = testdata_sigs[:half_point]
second_half = testdata_sigs[half_point:]

print(first_half)
print(second_half)

# should be no overlap
assert not set(first_half).intersection(set(second_half))

with pytest.warns(None) as record:
c.run_sourmash('index', '-k', '31', 'zzz.sbt.zip',
*first_half)
Expand All @@ -4138,7 +4144,8 @@ def test_do_sourmash_index_zipfile_append(c):
c.run_sourmash('index', "--append", '-k', '31', 'zzz.sbt.zip',
*second_half)
# UserWarning is raised when there are duplicated entries in the zipfile
assert not record
print(record)
assert not record, record

print(c)
assert c.last_result.status == 0
Expand Down
2 changes: 1 addition & 1 deletion utils/check-tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def main():
p.add_argument('sbt')
args = p.parse_args()

db = sourmash.load_sbt_index(args.sbt)
db = sourmash.sbtmh.load_sbt_index(args.sbt)
threshold = THRESHOLD

for leaf in db.leaves():
Expand Down