diff --git a/tools/marine_omics/.shed.yml b/tools/marine_omics/.shed.yml new file mode 100644 index 000000000..e079fcd17 --- /dev/null +++ b/tools/marine_omics/.shed.yml @@ -0,0 +1,15 @@ +categories: + - Ecology +owner: ecology +remote_repository_url: https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics +homepage_url: https://github.com/Finn-Lab/SanntiS +long_description: | + The Sanntis tool identify biosynthetic gene clusters (BGCs) in genomic & metagenomic data +type: unrestricted +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Wrapper for Sanntis tool: {{ tool_name }}." +suite: + name: "marine_omics_suite" + description: "A suite of tools for marine omics data" + type: unrestricted diff --git a/tools/marine_omics/sanntis.xml b/tools/marine_omics/sanntis.xml new file mode 100644 index 000000000..952dbf616 --- /dev/null +++ b/tools/marine_omics/sanntis.xml @@ -0,0 +1,53 @@ + + in genomic and metagenomic data + + 0.9.3.5 + 0 + + + topic_3387 + + + sanntis + + + + + + + + + + + + + + + + + + + 10.1101/2023.05.23.540769 + + diff --git a/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb new file mode 100644 index 000000000..0f290eb81 --- /dev/null +++ b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb @@ -0,0 +1,155 @@ +LOCUS BGC0001472 32 bp DNA UNK 01-JAN-1980 +DEFINITION BGC0001472. +ACCESSION BGC0001472 +VERSION BGC0001472 +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + CDS 312..683 + /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP + NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT + QGVKNRKQARSRYGAKKEK" + /protein_id="BGC0001472_1" + CDS 686..1156 + /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG + AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV + VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW" + /protein_id="BGC0001472_2" + CDS 1195..3324 + /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH + DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL + DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV + PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL + EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV + QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT + FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT + GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE + TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG + TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV + GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV + VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV + PRNVAEEIIAKAKGE" + /protein_id="BGC0001472_3" + CDS 3472..4665 + /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE + ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA + ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY + EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT + ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL + LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD + VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK" + /protein_id="BGC0001472_4" + CDS 4869..5570 + /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS + AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG + FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA + DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL + LATTYARPPES" + /protein_id="BGC0001472_5" + CDS 5567..7195 + /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG + ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA + MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE + VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP + YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI + DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY + YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG + VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP + GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD + ALARLLGTSPADEPVAAFASLGGTA" + /protein_id="BGC0001472_6" + CDS 7210..7821 + /translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH + ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK + RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN + LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR" + /protein_id="BGC0001472_7" + CDS 7845..9191 + /translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ + LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE + MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV + PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL + PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA + ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV + FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV + KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA" + /protein_id="BGC0001472_8" + CDS 9238..10437 + /translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT + IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD + EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV + SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF + EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS + HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT + SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE + " + /protein_id="BGC0001472_9" + CDS 10511..10654 + /translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS + SS" + /protein_id="BGC0001472_10" + CDS 10977..13634 + /translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK + NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE + TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV + AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG + TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR + MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL + FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD + SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA + SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY + TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR + DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN + LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF + MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN + VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ + WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW + TAAEEFFVELCTDN" + /protein_id="BGC0001472_11" + CDS 13612..14571 + /translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL + QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT + LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG + GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP + KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY + PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG" + /protein_id="BGC0001472_12" + CDS 14692..15894 + /translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL + LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI + PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR + AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT + FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY + FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL + AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS + W" + /protein_id="BGC0001472_13" + CDS 16220..16564 + /translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS + DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG + LGIALEGGTQ" + /protein_id="BGC0001472_14" + CDS 17019..17729 + /translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV + RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR + TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV + FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA + GIALHARARRNLSR" + /protein_id="BGC0001472_15" + CDS 17815..19485 + /translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM + HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT + LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV + TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR + KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP + LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP + GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH + TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL + YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH + VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR" + /protein_id="BGC0001472_16" +ORIGIN + 1 gatcgatcga tcgatcgatc gatcgatcga tc +// diff --git a/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv new file mode 100644 index 000000000..7cf4f5eb6 --- /dev/null +++ b/tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.ip.tsv @@ -0,0 +1,81 @@ +BGC0001472_13 874c0f534839f521f055a275c391567a 400 ProSitePatterns PS00086 Cytochrome P450 cysteine heme-iron ligand signature. 342 351 - T 13-08-2021 IPR017972 Cytochrome P450, conserved site +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 340 349 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 238 255 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 273 284 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 349 360 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 Pfam PF00067 Cytochrome P450 272 368 4.0E-18 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 273 284 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 319 334 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 291 318 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 340 349 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 349 360 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 138 154 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 192 214 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 155 170 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 91 102 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 Gene3D G3DSA:1.10.630.10 Cytochrome P450 2 400 7.0E-113 T 13-08-2021 IPR036396 Cytochrome P450 superfamily +BGC0001472_11 67b7792659aca4f0747f903233e4f593 885 Pfam PF04738 Lantibiotic dehydratase, N terminus 141 791 2.4E-20 T 13-08-2021 IPR006827 Lantibiotic dehydratase, N-terminal +BGC0001472_6 76d1387ac73417cb91ccfb11c2c5229e 542 Gene3D G3DSA:3.40.50.720 - 132 304 8.2E-16 T 13-08-2021 - - +BGC0001472_6 76d1387ac73417cb91ccfb11c2c5229e 542 Gene3D G3DSA:3.40.109.10 NADH Oxidase 348 542 3.1E-35 T 13-08-2021 IPR000415 Nitroreductase-like +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.40.50.300 - 3 304 1.4E-121 T 13-08-2021 IPR027417 P-loop containing nucleoside triphosphate hydrolase +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF00009 Elongation factor Tu GTP binding domain 10 294 1.2E-65 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 TIGRFAM TIGR00231 small_GTP: small GTP-binding protein domain 11 184 1.5E-33 T 13-08-2021 IPR005225 Small GTP-binding protein domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF03144 Elongation factor Tu domain 2 337 404 9.3E-16 T 13-08-2021 IPR004161 Translation elongation factor EFTu-like, domain 2 +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF00679 Elongation factor G C-terminus 615 701 2.7E-29 T 13-08-2021 IPR000640 Elongation factor EFG, domain V-like +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF14492 Elongation Factor G, domain III 417 491 2.5E-33 T 13-08-2021 IPR041095 Elongation Factor G, domain II +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.70.870 Elongation Factor G (Translational Gtpase), domain 3 421 497 1.0E-34 T 13-08-2021 - - +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 TIGRFAM TIGR00484 EF-G: translation elongation factor G 5 707 0.0 T 13-08-2021 IPR004540 Translation elongation factor EFG/EF2 +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.230.10 - 498 703 4.6E-92 T 13-08-2021 IPR014721 Ribosomal protein S5 domain 2-type fold, subgroup +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 13 26 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 59 67 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 83 93 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 99 110 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 135 144 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF03764 Elongation factor G, domain IV 492 613 2.5E-47 T 13-08-2021 IPR005517 Translation elongation factor EFG/EF2, domain IV +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 ProSitePatterns PS00301 Translational (tr)-type guanine nucleotide-binding (G) domain signature. 52 67 - T 13-08-2021 IPR031157 Tr-type G domain, conserved site +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:2.40.30.10 Translation factors 305 420 6.0E-44 T 13-08-2021 - - +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.70.240 - 619 689 4.6E-92 T 13-08-2021 - - +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:3.40.50.300 - 1 205 3.7E-74 T 13-08-2021 IPR027417 P-loop containing nucleoside triphosphate hydrolase +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 TIGRFAM TIGR00485 EF-Tu: translation elongation factor Tu 1 396 0.0 T 13-08-2021 IPR004541 Translation elongation factor EFTu/EF1A, bacterial/organelle +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF03144 Elongation factor Tu domain 2 227 296 3.0E-17 T 13-08-2021 IPR004161 Translation elongation factor EFTu-like, domain 2 +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF03143 Elongation factor Tu C-terminal domain 301 395 1.4E-38 T 13-08-2021 IPR004160 Translation elongation factor EFTu/EF1A, C-terminal +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:2.40.30.10 Translation factors 208 337 2.9E-57 T 13-08-2021 - - +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 TIGRFAM TIGR00231 small_GTP: small GTP-binding protein domain 13 147 1.9E-13 T 13-08-2021 IPR005225 Small GTP-binding protein domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 14 27 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 60 68 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 80 90 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 96 107 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 133 142 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF00009 Elongation factor Tu GTP binding domain 10 203 5.6E-57 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:2.40.30.10 Translation factors 341 395 1.0E-25 T 13-08-2021 - - +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 ProSitePatterns PS00301 Translational (tr)-type guanine nucleotide-binding (G) domain signature. 53 68 - T 13-08-2021 IPR031157 Tr-type G domain, conserved site +BGC0001472_12 80b32fd90b93d2d340ddcffd78658c6b 319 Pfam PF14028 Lantibiotic biosynthesis dehydratase C-term 16 314 2.0E-49 T 13-08-2021 IPR023809 Thiopeptide-type bacteriocin biosynthesis domain +BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Gene3D G3DSA:3.40.50.10320 - 14 220 1.0E-8 T 13-08-2021 IPR024078 Putative deacetylase LmbE-like domain superfamily +BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Pfam PF02585 GlcNAc-PI de-N-acetylase 52 145 9.7E-10 T 13-08-2021 IPR003737 N-acetylglucosaminyl phosphatidylinositol deacetylase-related +BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Gene3D G3DSA:2.120.10.70 - 318 498 6.7E-7 T 13-08-2021 - - +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 TIGRFAM TIGR01029 rpsG_bact: ribosomal protein uS7 3 156 4.4E-64 T 13-08-2021 IPR005717 Ribosomal protein S7, bacterial/organellar-type +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 Pfam PF00177 Ribosomal protein S7p/S5e 1 149 4.0E-59 T 13-08-2021 IPR023798 Ribosomal protein S7 domain +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 Gene3D G3DSA:1.10.455.10 Ribosomal protein S7 domain 1 155 7.0E-60 T 13-08-2021 IPR036823 Ribosomal protein S7 domain superfamily +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 ProSitePatterns PS00052 Ribosomal protein S7 signature. 20 46 - T 13-08-2021 IPR020606 Ribosomal protein S7, conserved site +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Pfam PF04705 Thiostrepton-resistance methylase, N terminus 1 82 5.8E-30 T 13-08-2021 IPR006795 Thiostrepton-resistance methylase, N-terminal +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Gene3D G3DSA:3.40.1280.10 - 75 235 1.3E-37 T 13-08-2021 IPR029026 tRNA (guanine-N1-)-methyltransferase, N-terminal +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Gene3D G3DSA:3.30.1330.30 - 1 73 2.3E-26 T 13-08-2021 IPR029064 50S ribosomal protein L30e-like +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Pfam PF00588 SpoU rRNA Methylase family 88 227 1.8E-26 T 13-08-2021 IPR001537 tRNA/rRNA methyltransferase, SpoU type +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.40.250 - 104 186 3.5E-36 T 13-08-2021 - - +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 TIGRFAM TIGR03604 TOMM_cyclo_SagD: thiazole/oxazole-forming peptide maturase, SagD family component 75 448 1.4E-100 T 13-08-2021 IPR027624 Thiazole/oxazole-forming peptide maturase, SagD family component +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Pfam PF02624 YcaO cyclodehydratase, ATP-ad Mg2+-binding 75 406 8.3E-62 T 13-08-2021 IPR003776 YcaO-like domain +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.1330.230 - 82 405 3.5E-36 T 13-08-2021 - - +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.160.660 - 223 357 3.5E-36 T 13-08-2021 - - +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 TIGRFAM TIGR00981 rpsL_bact: ribosomal protein uS12 1 123 5.4E-69 T 13-08-2021 IPR005679 Ribosomal protein S12, bacterial-type +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 ProSitePatterns PS00055 Ribosomal protein S12 signature. 43 50 - T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 Pfam PF00164 Ribosomal protein S12/S23 12 123 8.3E-44 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 27 42 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 42 57 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 58 77 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 77 94 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 94 110 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 110 122 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 Gene3D G3DSA:2.40.50.140 - 1 123 1.6E-66 T 13-08-2021 - - +BGC0001472_7 80ec0c524f263f553a78952ff4408537 203 Gene3D G3DSA:3.40.50.720 - 16 185 5.5E-22 T 13-08-2021 - - +BGC0001472_14 b47c649341e9af373f88df5f17e9dc46 114 Gene3D G3DSA:3.30.70.100 - 1 92 1.6E-30 T 13-08-2021 - - +BGC0001472_5 8eb61811b90411be4123c98a64e16860 233 Gene3D G3DSA:3.40.109.10 NADH Oxidase 12 230 5.1E-18 T 13-08-2021 IPR000415 Nitroreductase-like diff --git a/tools/marine_omics/test-data/Sanntis_output_data.gff3 b/tools/marine_omics/test-data/Sanntis_output_data.gff3 new file mode 100644 index 000000000..d781ad52f --- /dev/null +++ b/tools/marine_omics/test-data/Sanntis_output_data.gff3 @@ -0,0 +1,2 @@ +##gff-version 3 +BGC0001472 SanntiSv0.9.3.5 CLUSTER 312 19485 . . . ID=BGC0001472_sanntis_1;nearest_MiBIG=BGC0001472;nearest_MiBIG_class=RiPP;nearest_MiBIG_diceDistance=0.037;score=0.900;partial=11