-
Notifications
You must be signed in to change notification settings - Fork 718
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add-usearch-unoise3 * add seqfu/derep * Delete modules/nf-core/usearch/unoise3/environment.yml * Delete modules/nf-core/usearch/unoise3/main.nf * remove files * update metadata * Update modules/nf-core/seqfu/derep/main.nf Co-authored-by: Simon Pearce <[email protected]> * Update modules/nf-core/seqfu/derep/main.nf Co-authored-by: Simon Pearce <[email protected]> * Update modules/nf-core/seqfu/derep/main.nf Co-authored-by: Simon Pearce <[email protected]> * Update modules/nf-core/seqfu/derep/main.nf Co-authored-by: Simon Pearce <[email protected]> * Update modules/nf-core/seqfu/derep/main.nf Co-authored-by: Simon Pearce <[email protected]> * Update modules/nf-core/seqfu/derep/tests/main.nf.test Co-authored-by: Simon Pearce <[email protected]> * Fix dataset in -stub * Clarify description * Update modules/nf-core/seqfu/derep/meta.yml Co-authored-by: Simon Pearce <[email protected]> * check for pre-existing output file && update test * Update main.nf --------- Co-authored-by: Simon Pearce <[email protected]>
- Loading branch information
Showing
6 changed files
with
285 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
name: "seqfu_derep" | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- "bioconda::seqfu=1.20.3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
process SEQFU_DEREP { | ||
tag "$meta.id" | ||
label 'process_low' | ||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/seqfu:1.20.3--h1eb128b_2': | ||
'biocontainers/seqfu:1.20.3--h1eb128b_2' }" | ||
|
||
input: | ||
tuple val(meta), path(fastas) | ||
|
||
output: | ||
tuple val(meta), path("*_derep.fasta.gz"), emit: fasta | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}_derep" | ||
def fasta_files = fastas.collect { it.getName() } | ||
if (fasta_files.any { it == "${prefix}.fasta.gz" }) { | ||
error "Input file name coincides with the output file name: ${prefix}.fasta.gz. Please set a unique prefix." | ||
} | ||
|
||
""" | ||
seqfu \\ | ||
derep \\ | ||
$args \\ | ||
$fastas | gzip -c > "${prefix}.fasta.gz" | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
seqfu: \$(seqfu version) | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}_derep" | ||
""" | ||
echo "" | gzip -c > "${prefix}.fasta.gz" | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
seqfu: \$(seqfu version) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "seqfu_derep" | ||
description: Dereplicate FASTX sequences, removing duplicate sequences and printing the number of identical sequences in the sequence header. Can dereplicate already dereplicated FASTA files, summing the numbers found in the headers. | ||
keywords: | ||
- dereplicate | ||
- fasta | ||
- uniques | ||
tools: | ||
- "seqfu": | ||
description: "DNA sequence utilities for FASTX files" | ||
homepage: "https://telatin.github.io/seqfu2/" | ||
documentation: "https://telatin.github.io/seqfu2/" | ||
tool_dev_url: "https://telatin.github.io/seqfu2/tools/derep.html" | ||
doi: "10.3390/bioengineering8050059" | ||
licence: ["GPL v3"] | ||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- fastas: | ||
type: file | ||
description: Input files (mainly FASTA, FASTQ supported) | ||
pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]" | ||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- fasta: | ||
type: file | ||
description: dereplicated file (FASTA format) | ||
pattern: "*.{fasta.gz}" | ||
authors: | ||
- "@telatin" | ||
maintainers: | ||
- "@telatin" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
nextflow_process { | ||
|
||
name "Test Process SEQFU_DEREP" | ||
script "../main.nf" | ||
process "SEQFU_DEREP" | ||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "seqfu" | ||
tag "seqfu/derep" | ||
|
||
test("derep - 3 fastas") { | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test' ], | ||
[ | ||
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true), | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), | ||
file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) | ||
] | ||
] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() }, | ||
{ assert process.out.fasta.size() == 1 } | ||
) | ||
} | ||
|
||
} | ||
test("derep - conflict") { | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = Channel.of(">T1;size=300", | ||
"TTGATCACATA", | ||
">T2;size=10", | ||
"TTGATCTCATA", | ||
"T3;size=4", | ||
"TTGATGACATA") | ||
.collectFile(name: "test_derep.fasta.gz", newLine:true, sort:false) | ||
.map { file -> [ [ id:'test', single_end:true ], file ] } | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("derep - stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
{ | ||
"derep - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" | ||
], | ||
"fasta": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test_derep.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "23.04.0" | ||
}, | ||
"timestamp": "2024-07-15T14:53:08.527366698" | ||
}, | ||
"derep - 3 fastas": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test" | ||
}, | ||
"test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" | ||
], | ||
"fasta": [ | ||
[ | ||
{ | ||
"id": "test" | ||
}, | ||
"test_derep.fasta.gz:md5,db73b7edf590972f275915ffb7810933" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,f71fc1ed4ec36bf3a389c4aa28e1f08d" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "23.04.0" | ||
}, | ||
"timestamp": "2024-07-15T14:53:02.934761389" | ||
}, | ||
"derep - conflict": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
|
||
], | ||
"1": [ | ||
|
||
], | ||
"fasta": [ | ||
|
||
], | ||
"versions": [ | ||
|
||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "23.04.0" | ||
}, | ||
"timestamp": "2024-07-15T15:17:01.598941968" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
seqfu/derep: | ||
- "modules/nf-core/seqfu/derep/**" |