Skip to content

Commit

Permalink
improve CSV output for search, marginally
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Mar 12, 2021
1 parent 356e934 commit cbd2503
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,8 @@ def search(args):
notify("** reporting only one match because --best-only was set")

if args.output:
fieldnames = ['similarity', 'name', 'filename', 'md5']
fieldnames = ['similarity', 'name', 'filename', 'md5',
'query_filename', 'query_name', 'query_md5']

with FileOutput(args.output, 'wt') as fp:
w = csv.DictWriter(fp, fieldnames=fieldnames)
Expand All @@ -506,6 +507,7 @@ def search(args):
for sr in results:
d = dict(sr._asdict())
del d['match']
del d['query']
w.writerow(d)

# save matching signatures upon request
Expand Down
9 changes: 7 additions & 2 deletions src/sourmash/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# generic SearchResult.
SearchResult = namedtuple('SearchResult',
'similarity, match, md5, filename, name')
'similarity, match, md5, filename, name, query, query_filename, query_name, query_md5')


def format_bp(bp):
Expand Down Expand Up @@ -45,7 +45,12 @@ def search_databases(query, databases, **kwargs):
match=match,
md5=match.md5sum(),
filename=filename,
name=match.name))
name=match.name,
query=query,
query_filename=query.filename,
query_name=query.name,
query_md5=query.md5sum()[:8]
))
return x

###
Expand Down
4 changes: 4 additions & 0 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,9 +807,13 @@ def test_search_csv(c):
with open(csv_file) as fp:
reader = csv.DictReader(fp)
row = next(reader)
print(row)
assert float(row['similarity']) == 0.93
assert row['filename'].endswith('short2.fa.sig')
assert row['md5'] == '914591cd1130aa915fe0c0c63db8f19d'
assert row['query_filename'].endswith('short.fa')
assert row['query_name'] == ''
assert row['query_md5'] == 'e26a306d'


@utils.in_tempdir
Expand Down

0 comments on commit cbd2503

Please sign in to comment.