Skip to content

Commit

Permalink
serovar list feature unit tested and new tests written, changelog upd…
Browse files Browse the repository at this point in the history
…ated
  • Loading branch information
kbessonov1984 committed Sep 25, 2024
1 parent 7aeb28b commit f1f5c33
Show file tree
Hide file tree
Showing 4 changed files with 324 additions and 16 deletions.
27 changes: 15 additions & 12 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,24 @@ Removed the following entries
9. Chichiri,"6,14,24","z4,z24",-,,H,TRUE,enterica
10. II 4:a:z39,"1,4,12,[27]",a,z39,,B,FALSE,salamae

Modified the following entries
1. Sundsvall,"[1],6,14,[25]",z,"e,n,x",,H,FALSE,enterica --> Sundsvall,"6,14",z,"e,n,x",,H,FALSE,enterica
2. Finkenwerder,"[1],6,14,[25]",d,"1,5",,H,FALSE,enterica --> Finkenwerder,"6,14",d,"1,5",,H,FALSE,enterica
3. Florida,"[1],6,14,[25]",d,"1,7",,H,FALSE,enterica --> Florida,"6,14",d,"1,7",,H,FALSE,enterica
4. Charity,"[1],6,14,[25]",d,"e,n,x",,H,FALSE,enterica --> Charity,"6,14",d,"e,n,x",,H,FALSE,enterica
5. Onderstepoort,"1,6,14,[25]","e,h","1,5",,H,FALSE,enterica --> Onderstepoort,"6,14","e,h","1,5",,H,FALSE,enterica
6. Schalkwijk,"6,14,[24]",i,"e,n,z15",,H,FALSE,enterica --> Schalkwijk,"6,14",i,"e,n,z15",,H,FALSE,enterica
7. Boecker,"[1],6,14,[25]","l,v","1,7",,H,FALSE,enterica --> Boecker,"6,14","l,v","1,7",,H,FALSE,enterica
8. Carrau,"6,14,[24]",y,"1,7",,H,FALSE,enterica --> Carrau,"6,14",y,"1,7",,H,FALSE,enterica
9. Uzaramo,"1,6,14,25","z4,z24",-,,H,TRUE,enterica --> Uzaramo,"6,14","z4,z24",-,,H,TRUE,enterica
10. Poano,"[1],6,14,[25]",z,"l,z13,z28",,H,FALSE,enterica --> Poano,"6,14",z,"l,z13,z28",,H,FALSE,enterica
The following entries were modified in the in the `O_antigen` field as such

| Before | After |
|--------|-------|
|Sundsvall,"[1],6,14,[25]",z,"e,n,x",,H,FALSE,enterica| Sundsvall,"6,14",z,"e,n,x",,H,FALSE,enterica |
|Finkenwerder,"[1],6,14,[25]",d,"1,5",,H,FALSE,enterica | Finkenwerder,"6,14",d,"1,5",,H,FALSE,enterica |
|Florida,"[1],6,14,[25]",d,"1,7",,H,FALSE,enterica | Florida,"6,14",d,"1,7",,H,FALSE,enterica |
| Charity,"[1],6,14,[25]",d,"e,n,x",,H,FALSE,enterica | Charity,"6,14",d,"e,n,x",,H,FALSE,enterica |
| Onderstepoort,"1,6,14,[25]","e,h","1,5",,H,FALSE,enterica | Onderstepoort,"6,14","e,h","1,5",,H,FALSE,enterica |
| Schalkwijk,"6,14,[24]",i,"e,n,z15",,H,FALSE,enterica | Schalkwijk,"6,14",i,"e,n,z15",,H,FALSE,enterica |
| Boecker,"[1],6,14,[25]","l,v","1,7",,H,FALSE,enterica |Boecker,"6,14","l,v","1,7",,H,FALSE,enterica |
| Carrau,"6,14,[24]",y,"1,7",,H,FALSE,enterica | Carrau,"6,14",y,"1,7",,H,FALSE,enterica |
| Uzaramo,"1,6,14,25","z4,z24",-,,H,TRUE,enterica | Uzaramo,"6,14","z4,z24",-,,H,TRUE,enterica |
| Poano,"[1],6,14,[25]",z,"l,z13,z28",,H,FALSE,enterica | Poano,"6,14",z,"l,z13,z28",,H,FALSE,enterica |

### New fields
- Added `antigenic_formula` field that aggregates the O, H1 and H2 antigen values in a single location for convenience
- Added `--list-of-serovars` option allowing user to provide a single column text file listing all serovars of interest to match against the SISTR prediction. This could be useful for cases when only a certain list of serovars could be reported
- Added `--list-of-serovars` option allowing user to provide a single column text file listing all serovars of interest to match against the SISTR prediction. The result will be reportedi in `serovar_in_list` field as `Y` or `N` if there is match or otherwise. This could be useful for cases when only a certain list of serovars could be reported

# 1.1.1

Expand Down
282 changes: 282 additions & 0 deletions sistr/data/serovar-list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
Abaetetuba
Aberdeen
Abony
Adelaide
Adjame
Agama
Agbeni
Ago
Agona
Alachua
Albany
Altona
Amager
Amsterdam
Anatum
Anderlecht
Apapa
Apeyeme
Arechavaleta
Augustenborg
Baildon
Bangkok
Bareilly
Barranquilla
Beaudesert
Berta
Birkenhead
Blockley
Bovismorbificans
Braenderup
Brancaster
Brandenburg
Bredeney
Butantan
Buzu
Canada
Cannstatt
Carmel
Carno
Carrau
Cerro
Charity
Chester
Chicago
Coeln
Coleypark
Colindale
Colobane
Concord
Corvallis
Cotham
Cubana
Curacao
Daytona
Denver
Derby
Dublin
Durban
Ealing
Eastbourne
Eko
Elisabethville
Enteritidis
Florida
Fluntern
Freetown
Fresno
Galiema
Gallinarum
Gaminara
Gatuni
Give
Goldcoast
Grumpensis
Guinea
Hadar
Haifa
Hartford
Havana
Heidelberg
Herston
Holcomb
Hvittingfoss
I 1,4,[5],12:b:-
I 1,4,[5],12:d:-
I 1,4,[5],12:i:-
I 1,4,[5],12:r:-
Ibadan
Idikan
II 42:b:e,n,x,z15
II 42:r:-
II 42:z:1,5
II 47:b:e,n,x,z15
II 48:d:z6
II 50:b:z6
II 58:d:z6
II 58:l,z13,z28:z6
IIIa 1,13,23:g,z51:-
IIIa 18:z4,z23:-
IIIa 40:z4,z23:-
IIIa 41:z4,z23:-
IIIa 44:z4,z23,z32:-
IIIa 44:z4,z23:-
IIIa 48:z4,z24:-
IIIa 56:z4,z23,z32:-
IIIb 16:z10:e,n,x,z15
IIIb 38:k:z
IIIb 38:k:z35
IIIb 48:l,v:1,5,7
IIIb 48:z52:z
IIIb 50:k:z
IIIb 50:r:z
IIIb 53:z10:z35
IIIb 60:r:e,n,x,z15
IIIb 60:r:z
IIIb 61:i:z53
IIIb 61:k:1,5,7
IIIb 61:k:z35
IIIb 61:l,v:1,5,7
IIIb 61:l,v:z35
IIIb 61:r:z
IIIb 61:z52:z53
IIIb 65:k:z
IIIb 65:k:z53
IIIb 65:z10:e,n,x,z15
Indiana
Infantis
Inganda
Inverness
Irumu
Isangi
Ituri
IV 11:g,z51:-
IV 16:z4,z32:-
IV 40:z4,z24:-
IV 43:z4,z23:-
IV 44:z36,z38:-
IV 44:z4,z23:-
IV 44:z4,z32:-
IV 45:g,z51:-
IV 48:g,z51:-
IV 48:z4,z32:-
IV 50:g,z51:-
IV 50:z4,z23:-
Jangwani
Javiana
Johannesburg
Kambole
Kandla
Kedougou
Kentucky
Kenya
Kiambu
Kimuenza
Kingston
Kirkee
Kisarawe
Kottbus
Kumasi
Lagos
Lansing
Larochelle
Lattenkamp
Lexington
Lille
Limete
Litchfield
Liverpool
Livingstone
Lomalinda
Lome
London
Louga
Lubbock
Luciana
Luke
Malstatt
Manchester
Manhattan
Marseille
Matadi
Matopeni
Mbandaka
Meleagridis
Mgulani
Miami
Michigan
Mikawasima
Milwaukee
Minnesota
Mishmarhaemek
Mississippi
Molade
Monschaui
Montevideo
Mountpleasant
Muenchen
Muenster
Napoli
Nchanga
Newlands
Newport
Nigeria
Norwich
Nottingham
Obogu
Offa
Ohio
Onderstepoort
Onireke
Oranienburg
Ordonez
Orientalis
Orion
Oslo
Ouakam
Overschie
Panama
Paratyphi A
Paratyphi B
Paratyphi B var. Java
Pensacola
Perth
Plymouth
Poano
Pomona
Poona
Potsdam
Putten
Reading
Redlands
Richmond
Ridge
Rissen
Roodepoort
Rubislaw
Saintpaul
Sandiego
Sanjuan
Schwarzengrund
Sendai
Senegal
Senftenberg
Shubra
Singapore
Sinstorf
Soerenga
Soumbedioune
Stanley
Stanleyville
Stockholm
Stourbridge
Suelldorf
Sundsvall
Szentes
Tafo
Takoradi
Tallahassee
Tamberma
Tanger
Tarshyne
Teddington
Tees
Telaviv
Telelkebir
Tennessee
Thompson
Tudu
Typhi
Typhimurium
Uganda
Umbilo
Urbana
Uzaramo
Virchow
Vitkin
Wandsworth
Wangata
Weltevreden
Wien
Worthington
Zaiman
8 changes: 4 additions & 4 deletions sistr/sistr_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ def init_parser():
type=int,
default=1,
help='Number of parallel threads to run sistr_cmd analysis.')
parser.add_argument('-l', '--list-of-serovars',
type=str, required=False,
help='A path to a single column text file containing list of serovar(s) to check serovar prediction against. Report predicted serovar is Y (present) and N (absent) in the list')
parser.add_argument('-l', '--list-of-serovars', nargs='?',
required=False, const=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/serovar-list.txt"),
help='A path to a single column text file containing list of serovar(s) to check serovar prediction against. Report predicted serovar in "serovar_in_list" field as Y (present) and N (absent) in the list. The default list will be used if not file specified.')
parser.add_argument('-v',
'--verbose',
action='count',
Expand Down Expand Up @@ -256,7 +256,7 @@ def sistr_predict(input_fasta, genome_name, tmp_dir, keep_tmp, args):
if os.path.exists(args.list_of_serovars):
with open(args.list_of_serovars) as fp:
serovars_selected_list = [l.rstrip() for l in fp.readlines()]
logging.info(f"Selected serovars list to check predictions against from {args.list_of_serovars} is {serovars_selected_list}")
logging.info(f"Selected serovars list to check predictions against from {args.list_of_serovars} containing {len(serovars_selected_list)} serovars")
else:
logging.warning(f"File {args.list_of_serovars} does not exist in path specified. Would not check against list of provided serovars ...")

Expand Down
23 changes: 23 additions & 0 deletions tests/test_serotyping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import sys, os, json
from sistr.sistr_cmd import main



def test_serotyping(fasta_path):
sys.argv[1:] = ["-i",fasta_path, "test", "--more-results", "--run-mash", "--qc", "-l", "-vvv", "-o", "sistr_test_results"]
main()
assert os.path.exists("sistr_test_results.json"), f"Results SISTR file for {fasta_path} not found"
with open("sistr_test_results.json") as fp:
sistr_json_results = json.load(fp)[0]
assert sistr_json_results['antigenic_formula'] == "58:l,z13,z28:z6"
assert sistr_json_results['cgmlst_matching_alleles'] == 330
assert sistr_json_results['cgmlst_found_loci'] == 330
assert sistr_json_results['serovar_in_list'] == "Y"
assert sistr_json_results['serovar'] == "II 58:l,z13,z28:z6"

def test_noserovarlist_file(fasta_path):
sys.argv[1:] = ["-i",fasta_path, "test", "--more-results", "--run-mash", "--qc", "-l", "no_file_exists.txt", "-vvv", "-o", "sistr_test_results"]
main()
with open("sistr_test_results.json") as fp:
sistr_json_results = json.load(fp)[0]
assert 'serovar_in_list' not in sistr_json_results.keys()

0 comments on commit f1f5c33

Please sign in to comment.