Skip to content

Commit

Permalink
get versioning working for more tools
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Apr 12, 2024
1 parent dc3cb7e commit 3ac1a18
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 25 deletions.
23 changes: 18 additions & 5 deletions bin/plot_quality_profile.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(optparse))
packages <- c('dada2', 'tidyverse', 'optparse', 'yaml', 'tibble')

suppressPackageStartupMessages({
lapply(packages, require, character.only = TRUE)
})

option_list = list(
make_option(c("-i","--id"),
Expand All @@ -13,8 +15,13 @@ option_list = list(
help="File pattern [default \"%default\"]"),
make_option(c("--session"),
action = "store_true",
help="Generate nf-core compliant YAML file w/ version information",
help="Generate nf-core compliant YAML file w/ version information"
),
# make_option(c("--process"),
# type="character",
# default = "PLOT_QUALITY_PROFILE",
# help="Process call"
# ),
make_option(c("--yaml"),
default = "versions.yml",
help="YAML file name (see --session)")
Expand All @@ -28,4 +35,10 @@ pl <- plotQualityProfile(fns)
ggsave(paste0(opt$id,".qualities.pdf"), plot=pl, device="pdf")

# we may revisit the quality scores and other info in this plot for other purposes
saveRDS(pl, paste0(opt$id,".qualities.RDS"))
saveRDS(pl, paste0(opt$id,".qualities.RDS"))

if (opt$session) {
pv <- sapply(packages, function(x) { as.character(packageVersion(x)) }, simplify = FALSE)
pv$R <- paste0(R.Version()[c("major", "minor")], collapse = ".")
write_yaml(list("PLOT_QUALITY_PROFILE"=pv), "versions.yml")
}
4 changes: 2 additions & 2 deletions modules/local/plotqualityprofile.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process PLOT_QUALITY_PROFILE {
tuple val(meta), path("*.RDS"), emit: rds

// TODO nf-core: List additional required output channels/values here
// path "versions.yml" , emit: versions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -23,7 +23,7 @@ process PLOT_QUALITY_PROFILE {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
plot_quality_profile.R --id ${meta.id}
plot_quality_profile.R --id ${meta.id} --session
"""

stub:
Expand Down
15 changes: 14 additions & 1 deletion modules/local/vsearch_eestats.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// TODO: at the moment I'm checking on the feasibility of using
// these for additional QC plots, esp on cum expected errors;
// at the moment they are a bit of a data dump
// TODO: not sure if this is a module or not already in place, need to check

process VSEARCH_EESTATS {
tag "$meta.id"
Expand All @@ -15,6 +16,7 @@ process VSEARCH_EESTATS {
path("${meta.id}.{R1,R2}.stats"), emit: stats
path("${meta.id}.{R1,R2}.eestats"), emit: eestats
path("${meta.id}.{R1,R2}.eestats2"), emit: eestats2
path("versions.yml") , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -33,6 +35,12 @@ process VSEARCH_EESTATS {
vsearch --fastq_eestats2 ${reads[0]} \
--ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
--output ${meta.id}.R1.eestats2
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
END_VERSIONS
"""
} else {
"""
Expand All @@ -55,13 +63,18 @@ process VSEARCH_EESTATS {
vsearch --fastq_eestats2 ${reads[1]} \
--ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
--output ${meta.id}.R2.eestats2
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
END_VERSIONS
"""
}

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.PDF
touch ${meta.id}.R1.stats ${meta.id}.R1.eestats ${meta.id}.R1.eestats2
"""
}
9 changes: 5 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ params {
// precheck = false

// TODO: this needs to be removed or made more specific
// amplicon = '16S'
// platform = 'illumina'
amplicon = "16S"
quality_binning = false // set to true if using binned qualities (NovaSeq, PacBio Revio)
amplicon_type = "overlapping"
platform = "illumina"

// QC
// skip_FASTQC = false // set to run this step by default, this can fail with large sample #'s
Expand Down Expand Up @@ -63,8 +65,7 @@ params {
rmPhiX = false

// Error model
quality_binning = false // false, set to true if using binned qualities (NovaSeq)
error_model = 'illumina' // NYI, thinking about best way to implement this
// custom_error_model = 'illumina' // NYI, thinking about best way to implement this

// Merging
// paired_type = "overlapping" // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length)
Expand Down
21 changes: 16 additions & 5 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -429,17 +429,28 @@
"default": "md5"
},
"quality_binning": {
"type": "boolean"
},
"error_model": {
"type": "string",
"default": "illumina"
"type": "boolean",
"description": "Are the quality scores binned? This can be used to adjust the error model for DADA2."
},
"rescue_unmerged": {
"type": "boolean"
},
"removeBimeraDenovo_options": {
"type": "string"
},
"amplicon": {
"type": "string",
"description": "Amplicon name",
"default": "16S"
},
"platform": {
"type": "string",
"default": "illumina",
"description": "Sequencing platform"
},
"amplicon_type": {
"type": "string",
"default": "overlapping"
}
}
}
19 changes: 11 additions & 8 deletions workflows/tada.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,11 @@ workflow TADA {
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

// def platform = ''
// def platform = params.platform.toLowerCase()
def platform = params.platform.toLowerCase()

// if (!(["illumina","pacbio","pacbio-kinnex"].contains(platform))) {
// exit 1, "Only supported platforms (--platform argument) are currently 'pacbio', 'pacbio-kinnex', or 'illumina'"
// }
if (!(["illumina","pacbio"].contains(platform))) {
exit 1, "Only supported platforms (--platform argument) are currently 'pacbio' or 'illumina'"
}

// TODO: implement seqtable input
// // ${deity} there has to be a better way to check this!
Expand All @@ -88,9 +87,9 @@ workflow TADA {
// }
// }

if (params.aligner == 'infernal' && params.infernalCM == false){
exit 1, "Must set covariance model using --infernalCM when using Infernal"
}
// if (params.aligner == 'infernal' && params.infernalCM == false){
// exit 1, "Must set covariance model using --infernalCM when using Infernal"
// }

if (!(['simple','md5'].contains(params.id_type))) {
exit 1, "--id_type can currently only be set to 'simple' or 'md5', got ${params.id_type}"
Expand All @@ -112,10 +111,14 @@ workflow TADA {
ch_samplesheet
)

ch_versions = ch_versions.mix(PLOT_QUALITY_PROFILE.out.versions.first())

VSEARCH_EESTATS (
ch_samplesheet
)

ch_versions = ch_versions.mix(VSEARCH_EESTATS.out.versions.first())

// TODO: notice aggregation of data for multiqc and for version tracking,
// needs to be added throughout the workflow
// ch_multiqc_files = ch_multiqc_files.mix(PLOTQUALITYPROFILE.out.zip.collect{it[1]})
Expand Down

0 comments on commit 3ac1a18

Please sign in to comment.