Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Seqfu/derep #5958

Merged
merged 21 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/seqfu/derep/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "seqfu_derep"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::seqfu=1.20.3"
50 changes: 50 additions & 0 deletions modules/nf-core/seqfu/derep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process SEQFU_DEREP {
tag "$meta.id"
label 'process_low'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqfu:1.20.3--h1eb128b_2':
'biocontainers/seqfu:1.20.3--h1eb128b_2' }"

input:
tuple val(meta), path(fastas)

output:
tuple val(meta), path("*_derep.fasta.gz"), emit: fasta
telatin marked this conversation as resolved.
Show resolved Hide resolved
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
telatin marked this conversation as resolved.
Show resolved Hide resolved
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_derep"
def fasta_files = fastas.collect { it.getName() }
if (fasta_files.any { it == "${prefix}.fasta.gz" }) {
error "Input file name coincides with the output file name: ${prefix}.fasta.gz. Please set a unique prefix."
}

"""
seqfu \\
derep \\
$args \\
$fastas | gzip -c > "${prefix}.fasta.gz"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqfu: \$(seqfu version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_derep"
"""
echo "" | gzip -c > "${prefix}.fasta.gz"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqfu: \$(seqfu version)
END_VERSIONS
"""
}
44 changes: 44 additions & 0 deletions modules/nf-core/seqfu/derep/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "seqfu_derep"
description: Dereplicate FASTX sequences, removing duplicate sequences and printing the number of identical sequences in the sequence header. Can dereplicate already dereplicated FASTA files, summing the numbers found in the headers.
keywords:
- dereplicate
- fasta
- uniques
tools:
- "seqfu":
description: "DNA sequence utilities for FASTX files"
homepage: "https://telatin.github.io/seqfu2/"
documentation: "https://telatin.github.io/seqfu2/"
tool_dev_url: "https://telatin.github.io/seqfu2/tools/derep.html"
doi: "10.3390/bioengineering8050059"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fastas:
type: file
description: Input files (mainly FASTA, FASTQ supported)
pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fasta:
type: file
description: dereplicated file (FASTA format)
pattern: "*.{fasta.gz}"
authors:
- "@telatin"
maintainers:
- "@telatin"
87 changes: 87 additions & 0 deletions modules/nf-core/seqfu/derep/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
nextflow_process {

name "Test Process SEQFU_DEREP"
script "../main.nf"
process "SEQFU_DEREP"

tag "modules"
tag "modules_nfcore"
tag "seqfu"
tag "seqfu/derep"

test("derep - 3 fastas") {

when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true)
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
SPPearce marked this conversation as resolved.
Show resolved Hide resolved
{ assert process.out.fasta.size() == 1 }
)
}

}
test("derep - conflict") {

when {
process {
"""
input[0] = Channel.of(">T1;size=300",
"TTGATCACATA",
">T2;size=10",
"TTGATCTCATA",
"T3;size=4",
"TTGATGACATA")
.collectFile(name: "test_derep.fasta.gz", newLine:true, sort:false)
.map { file -> [ [ id:'test', single_end:true ], file ] }
"""
}
}

then {
assertAll(
{ assert snapshot(process.out).match() }
)
}

}

test("derep - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
93 changes: 93 additions & 0 deletions modules/nf-core/seqfu/derep/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"derep - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
],
"fasta": [
[
{
"id": "test",
"single_end": false
},
"test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.04.0"
},
"timestamp": "2024-07-15T14:53:08.527366698"
},
"derep - 3 fastas": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933"
]
],
"1": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
],
"fasta": [
[
{
"id": "test"
},
"test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933"
]
],
"versions": [
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.04.0"
},
"timestamp": "2024-07-15T14:53:02.934761389"
},
"derep - conflict": {
"content": [
{
"0": [

],
"1": [

],
"fasta": [

],
"versions": [

]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.04.0"
},
"timestamp": "2024-07-15T15:17:01.598941968"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/seqfu/derep/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
seqfu/derep:
- "modules/nf-core/seqfu/derep/**"
Loading