From 5f73c0f29497b0bfba7b83a36fd4474ae42705bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:19:22 +0200
Subject: [PATCH 01/10] Create sanntis.xml
---
tools/marine_omics/sanntis.xml | 53 ++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
create mode 100644 tools/marine_omics/sanntis.xml
diff --git a/tools/marine_omics/sanntis.xml b/tools/marine_omics/sanntis.xml
new file mode 100644
index 000000000..a175d7144
--- /dev/null
+++ b/tools/marine_omics/sanntis.xml
@@ -0,0 +1,53 @@
+
+ in genomic and metagenomic data
+
+ 0.1.0
+ 0
+
+
+ topic_3387
+
+
+ sanntis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1101/2023.05.23.540769
+
+
From 28eb3ae595b45720fa441d7eab097dd285772d68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:20:49 +0200
Subject: [PATCH 02/10] Add BGC0001472.fna.prodigal.faa.gb
---
.../test-data/BGC0001472.fna.prodigal.faa.gb | 155 ++++++++++++++++++
1 file changed, 155 insertions(+)
create mode 100644 tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb
diff --git a/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb
new file mode 100644
index 000000000..0f290eb81
--- /dev/null
+++ b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb
@@ -0,0 +1,155 @@
+LOCUS BGC0001472 32 bp DNA UNK 01-JAN-1980
+DEFINITION BGC0001472.
+ACCESSION BGC0001472
+VERSION BGC0001472
+KEYWORDS .
+SOURCE .
+ ORGANISM .
+ .
+FEATURES Location/Qualifiers
+ CDS 312..683
+ /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP
+ NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT
+ QGVKNRKQARSRYGAKKEK"
+ /protein_id="BGC0001472_1"
+ CDS 686..1156
+ /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG
+ AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV
+ VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW"
+ /protein_id="BGC0001472_2"
+ CDS 1195..3324
+ /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH
+ DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL
+ DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV
+ PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL
+ EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV
+ QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT
+ FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT
+ GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE
+ TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG
+ TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV
+ GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV
+ VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV
+ PRNVAEEIIAKAKGE"
+ /protein_id="BGC0001472_3"
+ CDS 3472..4665
+ /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE
+ ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA
+ ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY
+ EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT
+ ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL
+ LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD
+ VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK"
+ /protein_id="BGC0001472_4"
+ CDS 4869..5570
+ /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS
+ AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG
+ FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA
+ DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL
+ LATTYARPPES"
+ /protein_id="BGC0001472_5"
+ CDS 5567..7195
+ /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG
+ ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA
+ MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE
+ VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP
+ YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI
+ DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY
+ YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG
+ VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP
+ GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD
+ ALARLLGTSPADEPVAAFASLGGTA"
+ /protein_id="BGC0001472_6"
+ CDS 7210..7821
+ /translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH
+ ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK
+ RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN
+ LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR"
+ /protein_id="BGC0001472_7"
+ CDS 7845..9191
+ /translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ
+ LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE
+ MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV
+ PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL
+ PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA
+ ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV
+ FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV
+ KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA"
+ /protein_id="BGC0001472_8"
+ CDS 9238..10437
+ /translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT
+ IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD
+ EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV
+ SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF
+ EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS
+ HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT
+ SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
+ "
+ /protein_id="BGC0001472_9"
+ CDS 10511..10654
+ /translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS
+ SS"
+ /protein_id="BGC0001472_10"
+ CDS 10977..13634
+ /translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK
+ NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE
+ TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV
+ AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG
+ TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR
+ MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL
+ FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD
+ SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA
+ SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY
+ TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR
+ DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN
+ LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF
+ MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN
+ VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ
+ WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW
+ TAAEEFFVELCTDN"
+ /protein_id="BGC0001472_11"
+ CDS 13612..14571
+ /translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL
+ QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT
+ LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG
+ GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP
+ KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY
+ PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG"
+ /protein_id="BGC0001472_12"
+ CDS 14692..15894
+ /translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL
+ LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI
+ PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR
+ AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT
+ FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY
+ FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL
+ AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS
+ W"
+ /protein_id="BGC0001472_13"
+ CDS 16220..16564
+ /translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS
+ DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG
+ LGIALEGGTQ"
+ /protein_id="BGC0001472_14"
+ CDS 17019..17729
+ /translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV
+ RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR
+ TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV
+ FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA
+ GIALHARARRNLSR"
+ /protein_id="BGC0001472_15"
+ CDS 17815..19485
+ /translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM
+ HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT
+ LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV
+ TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR
+ KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP
+ LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP
+ GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH
+ TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL
+ YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH
+ VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR"
+ /protein_id="BGC0001472_16"
+ORIGIN
+ 1 gatcgatcga tcgatcgatc gatcgatcga tc
+//
From b77c1c27889d5335dc81772482b0e244da3ec0d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:21:32 +0200
Subject: [PATCH 03/10] Add test files
---
.../BGC0001472.fna.prodigal.faa.ip.tsv | 81 +++++++++++++++++++
.../test-data/Sanntis_output_data.gff3 | 2 +
2 files changed, 83 insertions(+)
create mode 100644 tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv
create mode 100644 tools/marine_omics/test-data/Sanntis_output_data.gff3
diff --git a/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv
new file mode 100644
index 000000000..7cf4f5eb6
--- /dev/null
+++ b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv
@@ -0,0 +1,81 @@
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 ProSitePatterns PS00086 Cytochrome P450 cysteine heme-iron ligand signature. 342 351 - T 13-08-2021 IPR017972 Cytochrome P450, conserved site
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 340 349 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 238 255 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 273 284 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 349 360 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 Pfam PF00067 Cytochrome P450 272 368 4.0E-18 T 13-08-2021 IPR001128 Cytochrome P450
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 273 284 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 319 334 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 291 318 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 340 349 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 349 360 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 138 154 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 192 214 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 155 170 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 91 102 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class
+BGC0001472_13 874c0f534839f521f055a275c391567a 400 Gene3D G3DSA:1.10.630.10 Cytochrome P450 2 400 7.0E-113 T 13-08-2021 IPR036396 Cytochrome P450 superfamily
+BGC0001472_11 67b7792659aca4f0747f903233e4f593 885 Pfam PF04738 Lantibiotic dehydratase, N terminus 141 791 2.4E-20 T 13-08-2021 IPR006827 Lantibiotic dehydratase, N-terminal
+BGC0001472_6 76d1387ac73417cb91ccfb11c2c5229e 542 Gene3D G3DSA:3.40.50.720 - 132 304 8.2E-16 T 13-08-2021 - -
+BGC0001472_6 76d1387ac73417cb91ccfb11c2c5229e 542 Gene3D G3DSA:3.40.109.10 NADH Oxidase 348 542 3.1E-35 T 13-08-2021 IPR000415 Nitroreductase-like
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.40.50.300 - 3 304 1.4E-121 T 13-08-2021 IPR027417 P-loop containing nucleoside triphosphate hydrolase
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF00009 Elongation factor Tu GTP binding domain 10 294 1.2E-65 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 TIGRFAM TIGR00231 small_GTP: small GTP-binding protein domain 11 184 1.5E-33 T 13-08-2021 IPR005225 Small GTP-binding protein domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF03144 Elongation factor Tu domain 2 337 404 9.3E-16 T 13-08-2021 IPR004161 Translation elongation factor EFTu-like, domain 2
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF00679 Elongation factor G C-terminus 615 701 2.7E-29 T 13-08-2021 IPR000640 Elongation factor EFG, domain V-like
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF14492 Elongation Factor G, domain III 417 491 2.5E-33 T 13-08-2021 IPR041095 Elongation Factor G, domain II
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.70.870 Elongation Factor G (Translational Gtpase), domain 3 421 497 1.0E-34 T 13-08-2021 - -
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 TIGRFAM TIGR00484 EF-G: translation elongation factor G 5 707 0.0 T 13-08-2021 IPR004540 Translation elongation factor EFG/EF2
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.230.10 - 498 703 4.6E-92 T 13-08-2021 IPR014721 Ribosomal protein S5 domain 2-type fold, subgroup
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 13 26 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 59 67 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 83 93 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 99 110 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 135 144 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF03764 Elongation factor G, domain IV 492 613 2.5E-47 T 13-08-2021 IPR005517 Translation elongation factor EFG/EF2, domain IV
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 ProSitePatterns PS00301 Translational (tr)-type guanine nucleotide-binding (G) domain signature. 52 67 - T 13-08-2021 IPR031157 Tr-type G domain, conserved site
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:2.40.30.10 Translation factors 305 420 6.0E-44 T 13-08-2021 - -
+BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.70.240 - 619 689 4.6E-92 T 13-08-2021 - -
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:3.40.50.300 - 1 205 3.7E-74 T 13-08-2021 IPR027417 P-loop containing nucleoside triphosphate hydrolase
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 TIGRFAM TIGR00485 EF-Tu: translation elongation factor Tu 1 396 0.0 T 13-08-2021 IPR004541 Translation elongation factor EFTu/EF1A, bacterial/organelle
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF03144 Elongation factor Tu domain 2 227 296 3.0E-17 T 13-08-2021 IPR004161 Translation elongation factor EFTu-like, domain 2
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF03143 Elongation factor Tu C-terminal domain 301 395 1.4E-38 T 13-08-2021 IPR004160 Translation elongation factor EFTu/EF1A, C-terminal
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:2.40.30.10 Translation factors 208 337 2.9E-57 T 13-08-2021 - -
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 TIGRFAM TIGR00231 small_GTP: small GTP-binding protein domain 13 147 1.9E-13 T 13-08-2021 IPR005225 Small GTP-binding protein domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 14 27 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 60 68 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 80 90 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 96 107 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 133 142 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF00009 Elongation factor Tu GTP binding domain 10 203 5.6E-57 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:2.40.30.10 Translation factors 341 395 1.0E-25 T 13-08-2021 - -
+BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 ProSitePatterns PS00301 Translational (tr)-type guanine nucleotide-binding (G) domain signature. 53 68 - T 13-08-2021 IPR031157 Tr-type G domain, conserved site
+BGC0001472_12 80b32fd90b93d2d340ddcffd78658c6b 319 Pfam PF14028 Lantibiotic biosynthesis dehydratase C-term 16 314 2.0E-49 T 13-08-2021 IPR023809 Thiopeptide-type bacteriocin biosynthesis domain
+BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Gene3D G3DSA:3.40.50.10320 - 14 220 1.0E-8 T 13-08-2021 IPR024078 Putative deacetylase LmbE-like domain superfamily
+BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Pfam PF02585 GlcNAc-PI de-N-acetylase 52 145 9.7E-10 T 13-08-2021 IPR003737 N-acetylglucosaminyl phosphatidylinositol deacetylase-related
+BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Gene3D G3DSA:2.120.10.70 - 318 498 6.7E-7 T 13-08-2021 - -
+BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 TIGRFAM TIGR01029 rpsG_bact: ribosomal protein uS7 3 156 4.4E-64 T 13-08-2021 IPR005717 Ribosomal protein S7, bacterial/organellar-type
+BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 Pfam PF00177 Ribosomal protein S7p/S5e 1 149 4.0E-59 T 13-08-2021 IPR023798 Ribosomal protein S7 domain
+BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 Gene3D G3DSA:1.10.455.10 Ribosomal protein S7 domain 1 155 7.0E-60 T 13-08-2021 IPR036823 Ribosomal protein S7 domain superfamily
+BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 ProSitePatterns PS00052 Ribosomal protein S7 signature. 20 46 - T 13-08-2021 IPR020606 Ribosomal protein S7, conserved site
+BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Pfam PF04705 Thiostrepton-resistance methylase, N terminus 1 82 5.8E-30 T 13-08-2021 IPR006795 Thiostrepton-resistance methylase, N-terminal
+BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Gene3D G3DSA:3.40.1280.10 - 75 235 1.3E-37 T 13-08-2021 IPR029026 tRNA (guanine-N1-)-methyltransferase, N-terminal
+BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Gene3D G3DSA:3.30.1330.30 - 1 73 2.3E-26 T 13-08-2021 IPR029064 50S ribosomal protein L30e-like
+BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Pfam PF00588 SpoU rRNA Methylase family 88 227 1.8E-26 T 13-08-2021 IPR001537 tRNA/rRNA methyltransferase, SpoU type
+BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.40.250 - 104 186 3.5E-36 T 13-08-2021 - -
+BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 TIGRFAM TIGR03604 TOMM_cyclo_SagD: thiazole/oxazole-forming peptide maturase, SagD family component 75 448 1.4E-100 T 13-08-2021 IPR027624 Thiazole/oxazole-forming peptide maturase, SagD family component
+BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Pfam PF02624 YcaO cyclodehydratase, ATP-ad Mg2+-binding 75 406 8.3E-62 T 13-08-2021 IPR003776 YcaO-like domain
+BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.1330.230 - 82 405 3.5E-36 T 13-08-2021 - -
+BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.160.660 - 223 357 3.5E-36 T 13-08-2021 - -
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 TIGRFAM TIGR00981 rpsL_bact: ribosomal protein uS12 1 123 5.4E-69 T 13-08-2021 IPR005679 Ribosomal protein S12, bacterial-type
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 ProSitePatterns PS00055 Ribosomal protein S12 signature. 43 50 - T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 Pfam PF00164 Ribosomal protein S12/S23 12 123 8.3E-44 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 27 42 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 42 57 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 58 77 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 77 94 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 94 110 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 110 122 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23
+BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 Gene3D G3DSA:2.40.50.140 - 1 123 1.6E-66 T 13-08-2021 - -
+BGC0001472_7 80ec0c524f263f553a78952ff4408537 203 Gene3D G3DSA:3.40.50.720 - 16 185 5.5E-22 T 13-08-2021 - -
+BGC0001472_14 b47c649341e9af373f88df5f17e9dc46 114 Gene3D G3DSA:3.30.70.100 - 1 92 1.6E-30 T 13-08-2021 - -
+BGC0001472_5 8eb61811b90411be4123c98a64e16860 233 Gene3D G3DSA:3.40.109.10 NADH Oxidase 12 230 5.1E-18 T 13-08-2021 IPR000415 Nitroreductase-like
diff --git a/tools/marine_omics/test-data/Sanntis_output_data.gff3 b/tools/marine_omics/test-data/Sanntis_output_data.gff3
new file mode 100644
index 000000000..d781ad52f
--- /dev/null
+++ b/tools/marine_omics/test-data/Sanntis_output_data.gff3
@@ -0,0 +1,2 @@
+##gff-version 3
+BGC0001472 SanntiSv0.9.3.5 CLUSTER 312 19485 . . . ID=BGC0001472_sanntis_1;nearest_MiBIG=BGC0001472;nearest_MiBIG_class=RiPP;nearest_MiBIG_diceDistance=0.037;score=0.900;partial=11
From fe37aa2dbd34ab9636fc068babd6ca0fecf7ad92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:24:01 +0200
Subject: [PATCH 04/10] Create .shed.yml
---
tools/marine_omics/.shed.yml | 15 +++++++++++++++
1 file changed, 15 insertions(+)
create mode 100644 tools/marine_omics/.shed.yml
diff --git a/tools/marine_omics/.shed.yml b/tools/marine_omics/.shed.yml
new file mode 100644
index 000000000..d9f76d1d2
--- /dev/null
+++ b/tools/marine_omics/.shed.yml
@@ -0,0 +1,15 @@
+categories:
+ - Ecology
+owner: ecology
+remote_repository_url: https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics
+homepage_url: https://github.com/fair-ease/Marine-Omics-Galaxy
+long_description: |
+ CTool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
+type: unrestricted
+auto_tool_repositories:
+ name_template: "{{ tool_id }}"
+ description_template: "Wrapper for marine omics tool: {{ tool_name }}."
+suite:
+ name: "marine_omics_suite"
+ description: "A suite of tools for marine omics data"
+ type: unrestricted
From 1b62ae08d3c98ec4f7b4afbf32731f80c4151d0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:57:54 +0200
Subject: [PATCH 05/10] Update tools/marine_omics/sanntis.xml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Björn Grüning
---
tools/marine_omics/sanntis.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/marine_omics/sanntis.xml b/tools/marine_omics/sanntis.xml
index a175d7144..0210e2eb4 100644
--- a/tools/marine_omics/sanntis.xml
+++ b/tools/marine_omics/sanntis.xml
@@ -14,7 +14,7 @@
sanntis --ip-file "$input_interpro" --outfile "output_sanntis.gff" "$input_genbank"
]]>
-
+
From 95ebefa995d3a088ec070e6bae5d412841e4d1e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:58:32 +0200
Subject: [PATCH 06/10] fix typo
---
tools/marine_omics/.shed.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/marine_omics/.shed.yml b/tools/marine_omics/.shed.yml
index d9f76d1d2..e6a566bae 100644
--- a/tools/marine_omics/.shed.yml
+++ b/tools/marine_omics/.shed.yml
@@ -4,7 +4,7 @@ owner: ecology
remote_repository_url: https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics
homepage_url: https://github.com/fair-ease/Marine-Omics-Galaxy
long_description: |
- CTool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
+ Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
type: unrestricted
auto_tool_repositories:
name_template: "{{ tool_id }}"
From fa951043ec3faeb1a3c4dea897e1a866533179aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:59:10 +0200
Subject: [PATCH 07/10] fix single quotes
---
tools/marine_omics/sanntis.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/marine_omics/sanntis.xml b/tools/marine_omics/sanntis.xml
index 0210e2eb4..bbe6c8575 100644
--- a/tools/marine_omics/sanntis.xml
+++ b/tools/marine_omics/sanntis.xml
@@ -11,7 +11,7 @@
sanntis
From 7caf0f96dd699af2518494e26c0bf0784a23c785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:59:45 +0200
Subject: [PATCH 08/10] Update tools/marine_omics/sanntis.xml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Björn Grüning
---
tools/marine_omics/sanntis.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/marine_omics/sanntis.xml b/tools/marine_omics/sanntis.xml
index bbe6c8575..7a90210e5 100644
--- a/tools/marine_omics/sanntis.xml
+++ b/tools/marine_omics/sanntis.xml
@@ -1,7 +1,7 @@
in genomic and metagenomic data
- 0.1.0
+ 0.9.3.5
0
From e9c6c3923d16593e6817bf9694fa4d79a0b62fa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marie=20Joss=C3=A9?=
<84919248+Marie59@users.noreply.github.com>
Date: Fri, 26 Jul 2024 15:59:57 +0200
Subject: [PATCH 09/10] Update tools/marine_omics/sanntis.xml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Björn Grüning
---
tools/marine_omics/sanntis.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/marine_omics/sanntis.xml b/tools/marine_omics/sanntis.xml
index 7a90210e5..952dbf616 100644
--- a/tools/marine_omics/sanntis.xml
+++ b/tools/marine_omics/sanntis.xml
@@ -8,7 +8,7 @@
topic_3387
- sanntis
+ sanntis
Date: Fri, 26 Jul 2024 16:06:49 +0200
Subject: [PATCH 10/10] Update .shed.yml
---
tools/marine_omics/.shed.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/marine_omics/.shed.yml b/tools/marine_omics/.shed.yml
index e6a566bae..e079fcd17 100644
--- a/tools/marine_omics/.shed.yml
+++ b/tools/marine_omics/.shed.yml
@@ -2,13 +2,13 @@ categories:
- Ecology
owner: ecology
remote_repository_url: https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics
-homepage_url: https://github.com/fair-ease/Marine-Omics-Galaxy
+homepage_url: https://github.com/Finn-Lab/SanntiS
long_description: |
- Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
+ The Sanntis tool identify biosynthetic gene clusters (BGCs) in genomic & metagenomic data
type: unrestricted
auto_tool_repositories:
name_template: "{{ tool_id }}"
- description_template: "Wrapper for marine omics tool: {{ tool_name }}."
+ description_template: "Wrapper for Sanntis tool: {{ tool_name }}."
suite:
name: "marine_omics_suite"
description: "A suite of tools for marine omics data"