Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added evigene/tr2aacds #5898

Merged
merged 3 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/evigene/tr2aacds/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "evigene_tr2aacds"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::evigene=23.7.15"
107 changes: 107 additions & 0 deletions modules/nf-core/evigene/tr2aacds/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
process EVIGENE_TR2AACDS {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/evigene:23.7.15--hdfd78af_1':
'biocontainers/evigene:23.7.15--hdfd78af_1' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("dropset") , emit: dropset
tuple val(meta), path("okayset") , emit: okayset
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def max_memory = 7*1024
if (!task.memory) {
log.info '[evigene] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.'
} else {
max_memory = (task.memory.mega*0.8).intValue()
}

def simple_name = fasta.simpleName
def rename_files= ( simple_name != prefix ) ? 'yes' : 'no'
"""
# Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
# Check for container variable initialisation script and source it.
if [ -f "/usr/local/env-activate.sh" ]; then
set +u # Otherwise, errors out because of various unbound variables
. "/usr/local/env-activate.sh"
set -u
fi

\$EVIGENEHOME/scripts/prot/tr2aacds.pl \\
$args \\
-NCPU=$task.cpus \\
-MAXMEM=$max_memory \\
-cdnaseq $fasta

if [ "$rename_files" = "yes" ]; then
find \\
dropset \\
-type f \\
-exec sh -c 'mv "\$1" "\$(echo \$1 | sed s/$simple_name/$prefix/)"' sh {} \\;

find \\
okayset \\
-type f \\
-exec sh -c 'mv "\$1" "\$(echo \$1 | sed s/$simple_name/$prefix/)"' sh {} \\;
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
tr2aacds: \$(cat \$EVIGENEHOME/scripts/prot/tr2aacds.pl | sed -n 's/use constant VERSION => \\([^;]*\\);.*/\\1/p')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def max_memory = 7*1024
if (!task.memory) {
log.info '[evigene] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.'
}
"""
# Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
# Check for container variable initialisation script and source it.
if [ -f "/usr/local/env-activate.sh" ]; then
set +u # Otherwise, errors out because of various unbound variables
. "/usr/local/env-activate.sh"
set -u
fi

mkdir dropset
touch dropset/${prefix}.drop.aa
touch dropset/${prefix}.drop.cds
touch dropset/${prefix}.drop.tr

mkdir okayset
touch okayset/${prefix}.ann.txt
touch okayset/${prefix}.cull.aa
touch okayset/${prefix}.cull.cds
touch okayset/${prefix}.cull.mrna
touch okayset/${prefix}.genesum.txt
touch okayset/${prefix}.mainalt.tab
touch okayset/${prefix}.okay.aa
touch okayset/${prefix}.okay.cds
touch okayset/${prefix}.okay.mrna
touch okayset/${prefix}.pubids
touch okayset/${prefix}.pubids.old
touch okayset/${prefix}.pubids.realt.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
tr2aacds: \$(cat \$EVIGENEHOME/scripts/prot/tr2aacds.pl | sed -n 's/use constant VERSION => \\([^;]*\\);.*/\\1/p')
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/nf-core/evigene/tr2aacds/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "evigene_tr2aacds"
description: Uses evigene/scripts/prot/tr2aacds.pl to filter a transcript assembly
keywords:
- genomics
- transcript
- assembly
- clean
- polish
- filter
- redundant
- duplicate
tools:
- "evigene":
description: |
EvidentialGene is a genome informatics project for "Evidence Directed Gene Construction
for Eukaryotes", for constructing high quality, accurate gene sets for animals and
plants (any eukaryotes), being developed by Don Gilbert at Indiana University, gilbertd at indiana edu.
homepage: "http://arthropods.eugenes.org/EvidentialGene/evigene/"
documentation: "http://arthropods.eugenes.org/EvidentialGene/evigene/"
tool_dev_url: "http://arthropods.eugenes.org/EvidentialGene/evigene/"
doi: "10.7490/f1000research.1112594.1 "
licence: ["Don Gilbert, gilbertd At indiana edu, 2018"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- fasta:
type: file
description: Transcript assembly in fasta format
pattern: "*.{fsa,fa,fasta,fsa.gz,fa.gz,fasta.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- dropset:
type: directory
description: Directory containing dropped transcripts and associated files
pattern: "dropset"
- okayset:
type: directory
description: Directory containing included transcripts and associated files
pattern: "okayset"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@GallVp"
maintainers:
- "@GallVp"
64 changes: 64 additions & 0 deletions modules/nf-core/evigene/tr2aacds/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
nextflow_process {

name "Test Process EVIGENE_TR2AACDS"
script "../main.nf"
process "EVIGENE_TR2AACDS"

tag "modules"
tag "modules_nfcore"
tag "evigene"
tag "evigene/tr2aacds"

test("homo_sapiens - transcriptome_fasta") {
when {
process {
"""

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.dropset[0][1],
file(process.out.okayset[0][1]).listFiles().collect { it.name }.toSorted(),
process.out.versions,
).match()
}
)
}

}

test("stub") {

options '-stub'

when {
process {
"""

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
120 changes: 120 additions & 0 deletions modules/nf-core/evigene/tr2aacds/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
{
"stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
[
"test.drop.aa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.drop.cds:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.drop.tr:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
"1": [
[
{
"id": "test",
"single_end": false
},
[
"test.ann.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.cull.aa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.cull.cds:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.cull.mrna:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.genesum.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.mainalt.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.okay.aa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.okay.cds:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.okay.mrna:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.pubids:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.pubids.old:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.pubids.realt.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
"2": [
"versions.yml:md5,57e8000ce92f5f8d590e7baffa762e84"
],
"dropset": [
[
{
"id": "test",
"single_end": false
},
[
"test.drop.aa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.drop.cds:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.drop.tr:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
"okayset": [
[
{
"id": "test",
"single_end": false
},
[
"test.ann.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.cull.aa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.cull.cds:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.cull.mrna:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.genesum.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.mainalt.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.okay.aa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.okay.cds:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.okay.mrna:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.pubids:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.pubids.old:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.pubids.realt.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
"versions": [
"versions.yml:md5,57e8000ce92f5f8d590e7baffa762e84"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-30T13:24:57.680066"
},
"homo_sapiens - transcriptome_fasta": {
"content": [
[
"test.drop.aa:md5,b43c660c34a419ec35090c163a4e4a54",
"test.drop.cds:md5,5adb810e379fde7d2057a34ead7eaa58",
"test.drop.tr:md5,5f38aa1a7e7adb09ce34c0bd74727d67"
],
[
"test.ann.txt",
"test.cull.aa",
"test.cull.cds",
"test.cull.mrna",
"test.genesum.txt",
"test.mainalt.tab",
"test.okay.aa",
"test.okay.cds",
"test.okay.mrna",
"test.pubids",
"test.pubids.old",
"test.pubids.realt.log"
],
[
"versions.yml:md5,57e8000ce92f5f8d590e7baffa762e84"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-30T13:46:54.373144"
}
}
Loading