Skip to content

Commit

Permalink
Add Seqfu/derep (#5958)
Browse files Browse the repository at this point in the history
* add-usearch-unoise3

* add seqfu/derep

* Delete modules/nf-core/usearch/unoise3/environment.yml

* Delete modules/nf-core/usearch/unoise3/main.nf

* remove files

* update metadata

* Update modules/nf-core/seqfu/derep/main.nf

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/seqfu/derep/main.nf

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/seqfu/derep/main.nf

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/seqfu/derep/main.nf

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/seqfu/derep/main.nf

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/seqfu/derep/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* Fix dataset in -stub

* Clarify description

* Update modules/nf-core/seqfu/derep/meta.yml

Co-authored-by: Simon Pearce <[email protected]>

* check for pre-existing output file && update test

* Update main.nf

---------

Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
telatin and SPPearce authored Jul 18, 2024
1 parent af50683 commit e2e6613
Show file tree
Hide file tree
Showing 6 changed files with 285 additions and 0 deletions.
9 changes: 9 additions & 0 deletions modules/nf-core/seqfu/derep/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "seqfu_derep"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::seqfu=1.20.3"
50 changes: 50 additions & 0 deletions modules/nf-core/seqfu/derep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process SEQFU_DEREP {
tag "$meta.id"
label 'process_low'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqfu:1.20.3--h1eb128b_2':
'biocontainers/seqfu:1.20.3--h1eb128b_2' }"

input:
tuple val(meta), path(fastas)

output:
tuple val(meta), path("*_derep.fasta.gz"), emit: fasta
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_derep"
def fasta_files = fastas.collect { it.getName() }
if (fasta_files.any { it == "${prefix}.fasta.gz" }) {
error "Input file name coincides with the output file name: ${prefix}.fasta.gz. Please set a unique prefix."
}

"""
seqfu \\
derep \\
$args \\
$fastas | gzip -c > "${prefix}.fasta.gz"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqfu: \$(seqfu version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_derep"
"""
echo "" | gzip -c > "${prefix}.fasta.gz"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqfu: \$(seqfu version)
END_VERSIONS
"""
}
44 changes: 44 additions & 0 deletions modules/nf-core/seqfu/derep/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "seqfu_derep"
description: Dereplicate FASTX sequences, removing duplicate sequences and printing the number of identical sequences in the sequence header. Can dereplicate already dereplicated FASTA files, summing the numbers found in the headers.
keywords:
- dereplicate
- fasta
- uniques
tools:
- "seqfu":
description: "DNA sequence utilities for FASTX files"
homepage: "https://telatin.github.io/seqfu2/"
documentation: "https://telatin.github.io/seqfu2/"
tool_dev_url: "https://telatin.github.io/seqfu2/tools/derep.html"
doi: "10.3390/bioengineering8050059"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fastas:
type: file
description: Input files (mainly FASTA, FASTQ supported)
pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fasta:
type: file
description: dereplicated file (FASTA format)
pattern: "*.{fasta.gz}"
authors:
- "@telatin"
maintainers:
- "@telatin"
87 changes: 87 additions & 0 deletions modules/nf-core/seqfu/derep/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
nextflow_process {

name "Test Process SEQFU_DEREP"
script "../main.nf"
process "SEQFU_DEREP"

tag "modules"
tag "modules_nfcore"
tag "seqfu"
tag "seqfu/derep"

test("derep - 3 fastas") {

when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true)
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert process.out.fasta.size() == 1 }
)
}

}
test("derep - conflict") {

when {
process {
"""
input[0] = Channel.of(">T1;size=300",
"TTGATCACATA",
">T2;size=10",
"TTGATCTCATA",
"T3;size=4",
"TTGATGACATA")
.collectFile(name: "test_derep.fasta.gz", newLine:true, sort:false)
.map { file -> [ [ id:'test', single_end:true ], file ] }
"""
}
}

then {
assertAll(
{ assert snapshot(process.out).match() }
)
}

}

test("derep - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
93 changes: 93 additions & 0 deletions modules/nf-core/seqfu/derep/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"derep - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
],
"fasta": [
[
{
"id": "test",
"single_end": false
},
"test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.04.0"
},
"timestamp": "2024-07-15T14:53:08.527366698"
},
"derep - 3 fastas": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933"
]
],
"1": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
],
"fasta": [
[
{
"id": "test"
},
"test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933"
]
],
"versions": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.04.0"
},
"timestamp": "2024-07-15T14:53:02.934761389"
},
"derep - conflict": {
"content": [
{
"0": [

],
"1": [

],
"fasta": [

],
"versions": [

]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.04.0"
},
"timestamp": "2024-07-15T15:17:01.598941968"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/seqfu/derep/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
seqfu/derep:
- "modules/nf-core/seqfu/derep/**"

0 comments on commit e2e6613

Please sign in to comment.