diff --git a/modules/nf-core/seqfu/derep/environment.yml b/modules/nf-core/seqfu/derep/environment.yml new file mode 100644 index 00000000000..16419e9d721 --- /dev/null +++ b/modules/nf-core/seqfu/derep/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "seqfu_derep" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::seqfu=1.20.3" diff --git a/modules/nf-core/seqfu/derep/main.nf b/modules/nf-core/seqfu/derep/main.nf new file mode 100644 index 00000000000..a6a22c47459 --- /dev/null +++ b/modules/nf-core/seqfu/derep/main.nf @@ -0,0 +1,50 @@ +process SEQFU_DEREP { + tag "$meta.id" + label 'process_low' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqfu:1.20.3--h1eb128b_2': + 'biocontainers/seqfu:1.20.3--h1eb128b_2' }" + + input: + tuple val(meta), path(fastas) + + output: + tuple val(meta), path("*_derep.fasta.gz"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_derep" + def fasta_files = fastas.collect { it.getName() } + if (fasta_files.any { it == "${prefix}.fasta.gz" }) { + error "Input file name coincides with the output file name: ${prefix}.fasta.gz. Please set a unique prefix." + } + + """ + seqfu \\ + derep \\ + $args \\ + $fastas | gzip -c > "${prefix}.fasta.gz" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqfu: \$(seqfu version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_derep" + """ + echo "" | gzip -c > "${prefix}.fasta.gz" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqfu: \$(seqfu version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqfu/derep/meta.yml b/modules/nf-core/seqfu/derep/meta.yml new file mode 100644 index 00000000000..c3ae374a6d3 --- /dev/null +++ b/modules/nf-core/seqfu/derep/meta.yml @@ -0,0 +1,44 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqfu_derep" +description: Dereplicate FASTX sequences, removing duplicate sequences and printing the number of identical sequences in the sequence header. Can dereplicate already dereplicated FASTA files, summing the numbers found in the headers. +keywords: + - dereplicate + - fasta + - uniques +tools: + - "seqfu": + description: "DNA sequence utilities for FASTX files" + homepage: "https://telatin.github.io/seqfu2/" + documentation: "https://telatin.github.io/seqfu2/" + tool_dev_url: "https://telatin.github.io/seqfu2/tools/derep.html" + doi: "10.3390/bioengineering8050059" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastas: + type: file + description: Input files (mainly FASTA, FASTQ supported) + pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: dereplicated file (FASTA format) + pattern: "*.{fasta.gz}" +authors: + - "@telatin" +maintainers: + - "@telatin" diff --git a/modules/nf-core/seqfu/derep/tests/main.nf.test b/modules/nf-core/seqfu/derep/tests/main.nf.test new file mode 100644 index 00000000000..37abcb232c8 --- /dev/null +++ b/modules/nf-core/seqfu/derep/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SEQFU_DEREP" + script "../main.nf" + process "SEQFU_DEREP" + + tag "modules" + tag "modules_nfcore" + tag "seqfu" + tag "seqfu/derep" + + test("derep - 3 fastas") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.fasta.size() == 1 } + ) + } + + } + test("derep - conflict") { + + when { + process { + """ + input[0] = Channel.of(">T1;size=300", + "TTGATCACATA", + ">T2;size=10", + "TTGATCTCATA", + "T3;size=4", + "TTGATGACATA") + .collectFile(name: "test_derep.fasta.gz", newLine:true, sort:false) + .map { file -> [ [ id:'test', single_end:true ], file ] } + """ + } + } + + then { + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + + test("derep - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqfu/derep/tests/main.nf.test.snap b/modules/nf-core/seqfu/derep/tests/main.nf.test.snap new file mode 100644 index 00000000000..a8885d36071 --- /dev/null +++ b/modules/nf-core/seqfu/derep/tests/main.nf.test.snap @@ -0,0 +1,93 @@ +{ + "derep - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.0" + }, + "timestamp": "2024-07-15T14:53:08.527366698" + }, + "derep - 3 fastas": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933" + ] + ], + "1": [ + "versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933" + ] + ], + "versions": [ + "versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.0" + }, + "timestamp": "2024-07-15T14:53:02.934761389" + }, + "derep - conflict": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "fasta": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.0" + }, + "timestamp": "2024-07-15T15:17:01.598941968" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqfu/derep/tests/tags.yml b/modules/nf-core/seqfu/derep/tests/tags.yml new file mode 100644 index 00000000000..5ed72e1263c --- /dev/null +++ b/modules/nf-core/seqfu/derep/tests/tags.yml @@ -0,0 +1,2 @@ +seqfu/derep: + - "modules/nf-core/seqfu/derep/**"