From f473a3b4c7036098d0552ea0923be6eaf29b30b7 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Sun, 30 Jun 2024 13:33:18 +1200 Subject: [PATCH] Added evigene/tr2aacds --- .../nf-core/evigene/tr2aacds/environment.yml | 9 ++ modules/nf-core/evigene/tr2aacds/main.nf | 88 +++++++++++++ modules/nf-core/evigene/tr2aacds/meta.yml | 51 ++++++++ .../evigene/tr2aacds/tests/main.nf.test | 64 ++++++++++ .../evigene/tr2aacds/tests/main.nf.test.snap | 120 ++++++++++++++++++ 5 files changed, 332 insertions(+) create mode 100644 modules/nf-core/evigene/tr2aacds/environment.yml create mode 100644 modules/nf-core/evigene/tr2aacds/main.nf create mode 100644 modules/nf-core/evigene/tr2aacds/meta.yml create mode 100644 modules/nf-core/evigene/tr2aacds/tests/main.nf.test create mode 100644 modules/nf-core/evigene/tr2aacds/tests/main.nf.test.snap diff --git a/modules/nf-core/evigene/tr2aacds/environment.yml b/modules/nf-core/evigene/tr2aacds/environment.yml new file mode 100644 index 000000000000..e49c50b03d25 --- /dev/null +++ b/modules/nf-core/evigene/tr2aacds/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "evigene_tr2aacds" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::evigene=23.7.15" diff --git a/modules/nf-core/evigene/tr2aacds/main.nf b/modules/nf-core/evigene/tr2aacds/main.nf new file mode 100644 index 000000000000..3e68b31ea896 --- /dev/null +++ b/modules/nf-core/evigene/tr2aacds/main.nf @@ -0,0 +1,88 @@ +process EVIGENE_TR2AACDS { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/evigene:23.7.15--hdfd78af_1': + 'biocontainers/evigene:23.7.15--hdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("dropset") , emit: dropset + tuple val(meta), path("okayset") , emit: okayset + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def max_memory = 7*1024 + if (!task.memory) { + log.info '[evigene] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.' + } else { + max_memory = (task.memory.mega*0.8).intValue() + } + + def simple_name = fasta.simpleName + """ + \$EVIGENEHOME/scripts/prot/tr2aacds.pl \\ + $args \\ + -NCPU=$task.cpus \\ + -MAXMEM=$max_memory \\ + -cdnaseq $fasta + + find \\ + dropset \\ + -type f \\ + -exec sh -c 'mv "\$1" "\$(echo \$1 | sed s/$simple_name/$prefix/)"' sh {} \\; + + find \\ + okayset \\ + -type f \\ + -exec sh -c 'mv "\$1" "\$(echo \$1 | sed s/$simple_name/$prefix/)"' sh {} \\; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tr2aacds: \$(cat \$EVIGENEHOME/scripts/prot/tr2aacds.pl | sed -n 's/use constant VERSION => \\([^;]*\\);.*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def max_memory = 7*1024 + if (!task.memory) { + log.info '[evigene] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.' + } + """ + mkdir dropset + touch dropset/${prefix}.drop.aa + touch dropset/${prefix}.drop.cds + touch dropset/${prefix}.drop.tr + + mkdir okayset + touch okayset/${prefix}.ann.txt + touch okayset/${prefix}.cull.aa + touch okayset/${prefix}.cull.cds + touch okayset/${prefix}.cull.mrna + touch okayset/${prefix}.genesum.txt + touch okayset/${prefix}.mainalt.tab + touch okayset/${prefix}.okay.aa + touch okayset/${prefix}.okay.cds + touch okayset/${prefix}.okay.mrna + touch okayset/${prefix}.pubids + touch okayset/${prefix}.pubids.old + touch okayset/${prefix}.pubids.realt.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tr2aacds: \$(cat \$EVIGENEHOME/scripts/prot/tr2aacds.pl | sed -n 's/use constant VERSION => \\([^;]*\\);.*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/evigene/tr2aacds/meta.yml b/modules/nf-core/evigene/tr2aacds/meta.yml new file mode 100644 index 000000000000..0ae318b2e13a --- /dev/null +++ b/modules/nf-core/evigene/tr2aacds/meta.yml @@ -0,0 +1,51 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "evigene_tr2aacds" +description: Uses evigene/scripts/prot/tr2aacds.pl to filter a transcript assembly +keywords: + - sort + - example + - genomics +tools: + - "evigene": + description: | + EvidentialGene is a genome informatics project for "Evidence Directed Gene Construction + for Eukaryotes", for constructing high quality, accurate gene sets for animals and + plants (any eukaryotes), being developed by Don Gilbert at Indiana University, gilbertd at indiana edu. + homepage: "http://arthropods.eugenes.org/EvidentialGene/evigene/" + documentation: "http://arthropods.eugenes.org/EvidentialGene/evigene/" + tool_dev_url: "http://arthropods.eugenes.org/EvidentialGene/evigene/" + doi: "10.7490/f1000research.1112594.1 " + licence: ["Don Gilbert, gilbertd At indiana edu, 2018"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Transcript assembly in fasta format + pattern: "*.{fsa,fa,fasta,fsa.gz,fa.gz,fasta.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - dropset: + type: directory + description: Directory containing dropped transcripts and associated files + pattern: "dropset" + - okayset: + type: directory + description: Directory containing included transcripts and associated files + pattern: "okayset" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/evigene/tr2aacds/tests/main.nf.test b/modules/nf-core/evigene/tr2aacds/tests/main.nf.test new file mode 100644 index 000000000000..5ab9812a33ec --- /dev/null +++ b/modules/nf-core/evigene/tr2aacds/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process EVIGENE_TR2AACDS" + script "../main.nf" + process "EVIGENE_TR2AACDS" + + tag "modules" + tag "modules_nfcore" + tag "evigene" + tag "evigene/tr2aacds" + + test("homo_sapiens - transcriptome_fasta") { + when { + process { + """ + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.dropset[0][1], + file(process.out.okayset[0][1]).listFiles().collect { it.name }, + process.out.versions, + ).match() + } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/evigene/tr2aacds/tests/main.nf.test.snap b/modules/nf-core/evigene/tr2aacds/tests/main.nf.test.snap new file mode 100644 index 000000000000..c77f2a13c6aa --- /dev/null +++ b/modules/nf-core/evigene/tr2aacds/tests/main.nf.test.snap @@ -0,0 +1,120 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.drop.aa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.drop.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.drop.tr:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ann.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.cull.aa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.cull.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.cull.mrna:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.genesum.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.mainalt.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.okay.aa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.okay.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.okay.mrna:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.pubids:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.pubids.old:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.pubids.realt.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + "versions.yml:md5,57e8000ce92f5f8d590e7baffa762e84" + ], + "dropset": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.drop.aa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.drop.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.drop.tr:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "okayset": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ann.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.cull.aa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.cull.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.cull.mrna:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.genesum.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.mainalt.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.okay.aa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.okay.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.okay.mrna:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.pubids:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.pubids.old:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.pubids.realt.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,57e8000ce92f5f8d590e7baffa762e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T13:24:57.680066" + }, + "homo_sapiens - transcriptome_fasta": { + "content": [ + [ + "test.drop.aa:md5,b43c660c34a419ec35090c163a4e4a54", + "test.drop.cds:md5,5adb810e379fde7d2057a34ead7eaa58", + "test.drop.tr:md5,5f38aa1a7e7adb09ce34c0bd74727d67" + ], + [ + "test.pubids", + "test.ann.txt", + "test.pubids.realt.log", + "test.mainalt.tab", + "test.okay.aa", + "test.okay.mrna", + "test.cull.aa", + "test.cull.mrna", + "test.genesum.txt", + "test.cull.cds", + "test.okay.cds", + "test.pubids.old" + ], + [ + "versions.yml:md5,57e8000ce92f5f8d590e7baffa762e84" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T13:32:04.295304" + } +} \ No newline at end of file