Skip to content

Commit

Permalink
Sanntis (#121)
Browse files Browse the repository at this point in the history
* Create sanntis.xml

* Add BGC0001472.fna.prodigal.faa.gb

* Add test files

* Create .shed.yml

* Update tools/marine_omics/sanntis.xml

Co-authored-by: Björn Grüning <[email protected]>

* fix typo

* fix single quotes

* Update tools/marine_omics/sanntis.xml

Co-authored-by: Björn Grüning <[email protected]>

* Update tools/marine_omics/sanntis.xml

Co-authored-by: Björn Grüning <[email protected]>

* Update .shed.yml

---------

Co-authored-by: Björn Grüning <[email protected]>
  • Loading branch information
Marie59 and bgruening authored Jul 26, 2024
1 parent 853190e commit 3b5d66e
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 0 deletions.
15 changes: 15 additions & 0 deletions tools/marine_omics/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
categories:
- Ecology
owner: ecology
remote_repository_url: https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics
homepage_url: https://github.com/Finn-Lab/SanntiS
long_description: |
The Sanntis tool identify biosynthetic gene clusters (BGCs) in genomic & metagenomic data
type: unrestricted
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for Sanntis tool: {{ tool_name }}."
suite:
name: "marine_omics_suite"
description: "A suite of tools for marine omics data"
type: unrestricted
53 changes: 53 additions & 0 deletions tools/marine_omics/sanntis.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<tool id="sanntis_marine" name="Sanntis biosynthetic gene clusters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
<description>in genomic and metagenomic data</description>
<macros>
<token name="@TOOL_VERSION@">0.9.3.5</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
<edam_topics>
<edam_topic>topic_3387</edam_topic>
</edam_topics>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">sanntis</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
sanntis --ip-file '$input_interpro' --outfile 'output_sanntis.gff' '$input_genbank'
]]></command>
<inputs>
<param name="input_interpro" type="data" format="tabular" label="Input the TSV file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/>
<param name="input_genbank" type="data" format="genbank" label="Input a Genbank .gb file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/>
</inputs>
<outputs>
<data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_interpro" value="BGC0001472.fna.prodigal.faa.ip.tsv"/>
<param name="input_genbank" value="BGC0001472.fna.prodigal.faa.gb"/>
<output name="output_sanntis" value="Sanntis_output_data.gff3"/>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
**What it does**
SMBGC Annotation using Neural Networks Trained on Interpro Signatures
Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
.....
**Input**
- A tsv file coming from InterProScan tool from which you can retrieve the right data.
- The right Genbank file with the right structure that fits the protein fasta file used to run InterProScan tool
**Output**
- A Gff3 file
]]></help>
<citations>
<citation type="doi">10.1101/2023.05.23.540769</citation>
</citations>
</tool>
155 changes: 155 additions & 0 deletions tools/marine_omics/test-data/BGC0001472.fna.prodigal.faa.gb
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
LOCUS BGC0001472 32 bp DNA UNK 01-JAN-1980
DEFINITION BGC0001472.
ACCESSION BGC0001472
VERSION BGC0001472
KEYWORDS .
SOURCE .
ORGANISM .
.
FEATURES Location/Qualifiers
CDS 312..683
/translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP
NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT
QGVKNRKQARSRYGAKKEK"
/protein_id="BGC0001472_1"
CDS 686..1156
/translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG
AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV
VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW"
/protein_id="BGC0001472_2"
CDS 1195..3324
/translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH
DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL
DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV
PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL
EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV
QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT
FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT
GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE
TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG
TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV
GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV
VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV
PRNVAEEIIAKAKGE"
/protein_id="BGC0001472_3"
CDS 3472..4665
/translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE
ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA
ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY
EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT
ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL
LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD
VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK"
/protein_id="BGC0001472_4"
CDS 4869..5570
/translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS
AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG
FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA
DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL
LATTYARPPES"
/protein_id="BGC0001472_5"
CDS 5567..7195
/translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG
ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA
MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE
VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP
YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI
DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY
YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG
VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP
GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD
ALARLLGTSPADEPVAAFASLGGTA"
/protein_id="BGC0001472_6"
CDS 7210..7821
/translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH
ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK
RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN
LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR"
/protein_id="BGC0001472_7"
CDS 7845..9191
/translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ
LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE
MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV
PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL
PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA
ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV
FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV
KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA"
/protein_id="BGC0001472_8"
CDS 9238..10437
/translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT
IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD
EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV
SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF
EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS
HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT
SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
"
/protein_id="BGC0001472_9"
CDS 10511..10654
/translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS
SS"
/protein_id="BGC0001472_10"
CDS 10977..13634
/translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK
NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE
TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV
AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG
TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR
MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL
FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD
SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA
SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY
TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR
DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN
LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF
MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN
VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ
WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW
TAAEEFFVELCTDN"
/protein_id="BGC0001472_11"
CDS 13612..14571
/translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL
QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT
LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG
GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP
KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY
PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG"
/protein_id="BGC0001472_12"
CDS 14692..15894
/translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL
LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI
PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR
AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT
FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY
FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL
AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS
W"
/protein_id="BGC0001472_13"
CDS 16220..16564
/translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS
DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG
LGIALEGGTQ"
/protein_id="BGC0001472_14"
CDS 17019..17729
/translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV
RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR
TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV
FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA
GIALHARARRNLSR"
/protein_id="BGC0001472_15"
CDS 17815..19485
/translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM
HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT
LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV
TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR
KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP
LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP
GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH
TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL
YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH
VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR"
/protein_id="BGC0001472_16"
ORIGIN
1 gatcgatcga tcgatcgatc gatcgatcga tc
//
Loading

0 comments on commit 3b5d66e

Please sign in to comment.