diff --git a/bin/plot_quality_profile.R b/bin/plot_quality_profile.R index d8cb64a..97a0811 100755 --- a/bin/plot_quality_profile.R +++ b/bin/plot_quality_profile.R @@ -1,7 +1,9 @@ #!/usr/bin/env Rscript -suppressPackageStartupMessages(library(dada2)) -suppressPackageStartupMessages(library(tidyverse)) -suppressPackageStartupMessages(library(optparse)) +packages <- c('dada2', 'tidyverse', 'optparse', 'yaml', 'tibble') + +suppressPackageStartupMessages({ + lapply(packages, require, character.only = TRUE) + }) option_list = list( make_option(c("-i","--id"), @@ -13,8 +15,13 @@ option_list = list( help="File pattern [default \"%default\"]"), make_option(c("--session"), action = "store_true", - help="Generate nf-core compliant YAML file w/ version information", + help="Generate nf-core compliant YAML file w/ version information" ), + # make_option(c("--process"), + # type="character", + # default = "PLOT_QUALITY_PROFILE", + # help="Process call" + # ), make_option(c("--yaml"), default = "versions.yml", help="YAML file name (see --session)") @@ -28,4 +35,10 @@ pl <- plotQualityProfile(fns) ggsave(paste0(opt$id,".qualities.pdf"), plot=pl, device="pdf") # we may revisit the quality scores and other info in this plot for other purposes -saveRDS(pl, paste0(opt$id,".qualities.RDS")) \ No newline at end of file +saveRDS(pl, paste0(opt$id,".qualities.RDS")) + +if (opt$session) { + pv <- sapply(packages, function(x) { as.character(packageVersion(x)) }, simplify = FALSE) + pv$R <- paste0(R.Version()[c("major", "minor")], collapse = ".") + write_yaml(list("PLOT_QUALITY_PROFILE"=pv), "versions.yml") +} \ No newline at end of file diff --git a/modules/local/plotqualityprofile.nf b/modules/local/plotqualityprofile.nf index baaac80..9d8b5d0 100644 --- a/modules/local/plotqualityprofile.nf +++ b/modules/local/plotqualityprofile.nf @@ -14,7 +14,7 @@ process PLOT_QUALITY_PROFILE { tuple val(meta), path("*.RDS"), emit: rds // TODO nf-core: List additional required output channels/values here - // path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,7 +23,7 @@ process PLOT_QUALITY_PROFILE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - plot_quality_profile.R --id ${meta.id} + plot_quality_profile.R --id ${meta.id} --session """ stub: diff --git a/modules/local/vsearch_eestats.nf b/modules/local/vsearch_eestats.nf index cb82e42..9d5412a 100644 --- a/modules/local/vsearch_eestats.nf +++ b/modules/local/vsearch_eestats.nf @@ -1,6 +1,7 @@ // TODO: at the moment I'm checking on the feasibility of using // these for additional QC plots, esp on cum expected errors; // at the moment they are a bit of a data dump +// TODO: not sure if this is a module or not already in place, need to check process VSEARCH_EESTATS { tag "$meta.id" @@ -15,6 +16,7 @@ process VSEARCH_EESTATS { path("${meta.id}.{R1,R2}.stats"), emit: stats path("${meta.id}.{R1,R2}.eestats"), emit: eestats path("${meta.id}.{R1,R2}.eestats2"), emit: eestats2 + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -33,6 +35,12 @@ process VSEARCH_EESTATS { vsearch --fastq_eestats2 ${reads[0]} \ --ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \ --output ${meta.id}.R1.eestats2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//') + END_VERSIONS + """ } else { """ @@ -55,6 +63,11 @@ process VSEARCH_EESTATS { vsearch --fastq_eestats2 ${reads[1]} \ --ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \ --output ${meta.id}.R2.eestats2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//') + END_VERSIONS """ } @@ -62,6 +75,6 @@ process VSEARCH_EESTATS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.PDF + touch ${meta.id}.R1.stats ${meta.id}.R1.eestats ${meta.id}.R1.eestats2 """ } diff --git a/nextflow.config b/nextflow.config index ad7232b..6b6d877 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,8 +27,10 @@ params { // precheck = false // TODO: this needs to be removed or made more specific - // amplicon = '16S' - // platform = 'illumina' + amplicon = "16S" + quality_binning = false // set to true if using binned qualities (NovaSeq, PacBio Revio) + amplicon_type = "overlapping" + platform = "illumina" // QC // skip_FASTQC = false // set to run this step by default, this can fail with large sample #'s @@ -63,8 +65,7 @@ params { rmPhiX = false // Error model - quality_binning = false // false, set to true if using binned qualities (NovaSeq) - error_model = 'illumina' // NYI, thinking about best way to implement this + // custom_error_model = 'illumina' // NYI, thinking about best way to implement this // Merging // paired_type = "overlapping" // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7c3cee8..9442aee 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -429,17 +429,28 @@ "default": "md5" }, "quality_binning": { - "type": "boolean" - }, - "error_model": { - "type": "string", - "default": "illumina" + "type": "boolean", + "description": "Are the quality scores binned? This can be used to adjust the error model for DADA2." }, "rescue_unmerged": { "type": "boolean" }, "removeBimeraDenovo_options": { "type": "string" + }, + "amplicon": { + "type": "string", + "description": "Amplicon name", + "default": "16S" + }, + "platform": { + "type": "string", + "default": "illumina", + "description": "Sequencing platform" + }, + "amplicon_type": { + "type": "string", + "default": "overlapping" } } } \ No newline at end of file diff --git a/workflows/tada.nf b/workflows/tada.nf index f15b404..b785444 100644 --- a/workflows/tada.nf +++ b/workflows/tada.nf @@ -61,12 +61,11 @@ workflow TADA { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - // def platform = '' - // def platform = params.platform.toLowerCase() + def platform = params.platform.toLowerCase() - // if (!(["illumina","pacbio","pacbio-kinnex"].contains(platform))) { - // exit 1, "Only supported platforms (--platform argument) are currently 'pacbio', 'pacbio-kinnex', or 'illumina'" - // } + if (!(["illumina","pacbio"].contains(platform))) { + exit 1, "Only supported platforms (--platform argument) are currently 'pacbio' or 'illumina'" + } // TODO: implement seqtable input // // ${deity} there has to be a better way to check this! @@ -88,9 +87,9 @@ workflow TADA { // } // } - if (params.aligner == 'infernal' && params.infernalCM == false){ - exit 1, "Must set covariance model using --infernalCM when using Infernal" - } + // if (params.aligner == 'infernal' && params.infernalCM == false){ + // exit 1, "Must set covariance model using --infernalCM when using Infernal" + // } if (!(['simple','md5'].contains(params.id_type))) { exit 1, "--id_type can currently only be set to 'simple' or 'md5', got ${params.id_type}" @@ -112,10 +111,14 @@ workflow TADA { ch_samplesheet ) + ch_versions = ch_versions.mix(PLOT_QUALITY_PROFILE.out.versions.first()) + VSEARCH_EESTATS ( ch_samplesheet ) + ch_versions = ch_versions.mix(VSEARCH_EESTATS.out.versions.first()) + // TODO: notice aggregation of data for multiqc and for version tracking, // needs to be added throughout the workflow // ch_multiqc_files = ch_multiqc_files.mix(PLOTQUALITYPROFILE.out.zip.collect{it[1]})