get versioning working for more tools

h3abionet · Apr 12, 2024 · 3ac1a18 · 3ac1a18
1 parent dc3cb7e
commit 3ac1a18
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 25 deletions.
diff --git a/bin/plot_quality_profile.R b/bin/plot_quality_profile.R
@@ -1,7 +1,9 @@
 #!/usr/bin/env Rscript
-suppressPackageStartupMessages(library(dada2))
-suppressPackageStartupMessages(library(tidyverse))
-suppressPackageStartupMessages(library(optparse))
+packages <- c('dada2', 'tidyverse', 'optparse', 'yaml', 'tibble')
+
+suppressPackageStartupMessages({
+    lapply(packages, require, character.only = TRUE)
+    })
 
 option_list = list(
     make_option(c("-i","--id"),
@@ -13,8 +15,13 @@ option_list = list(
         help="File pattern [default \"%default\"]"),
     make_option(c("--session"),
         action = "store_true", 
-        help="Generate nf-core compliant YAML file w/ version information",
+        help="Generate nf-core compliant YAML file w/ version information"
         ),
+    # make_option(c("--process"),
+    #     type="character",
+    #     default = "PLOT_QUALITY_PROFILE",
+    #     help="Process call"
+    #     ),
     make_option(c("--yaml"),
         default = "versions.yml", 
         help="YAML file name (see --session)")
@@ -28,4 +35,10 @@ pl <- plotQualityProfile(fns)
 ggsave(paste0(opt$id,".qualities.pdf"), plot=pl, device="pdf")
 
 # we may revisit the quality scores and other info in this plot for other purposes
-saveRDS(pl, paste0(opt$id,".qualities.RDS"))
+saveRDS(pl, paste0(opt$id,".qualities.RDS"))
+
+if (opt$session) {
+    pv <- sapply(packages, function(x) { as.character(packageVersion(x)) }, simplify = FALSE)
+    pv$R <- paste0(R.Version()[c("major", "minor")], collapse = ".")
+    write_yaml(list("PLOT_QUALITY_PROFILE"=pv), "versions.yml")
+}
diff --git a/modules/local/plotqualityprofile.nf b/modules/local/plotqualityprofile.nf
@@ -14,7 +14,7 @@ process PLOT_QUALITY_PROFILE {
     tuple val(meta), path("*.RDS"), emit: rds
 
     // TODO nf-core: List additional required output channels/values here
-    // path "versions.yml"           , emit: versions
+    path "versions.yml"           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -23,7 +23,7 @@ process PLOT_QUALITY_PROFILE {
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    plot_quality_profile.R --id ${meta.id}
+    plot_quality_profile.R --id ${meta.id} --session
     """
 
     stub:

diff --git a/modules/local/vsearch_eestats.nf b/modules/local/vsearch_eestats.nf
@@ -1,6 +1,7 @@
 // TODO: at the moment I'm checking on the feasibility of using 
 //       these for additional QC plots, esp on cum expected errors;
 //       at the moment they are a bit of a data dump
+// TODO: not sure if this is a module or not already in place, need to check
 
 process VSEARCH_EESTATS {
     tag "$meta.id"
@@ -15,6 +16,7 @@ process VSEARCH_EESTATS {
     path("${meta.id}.{R1,R2}.stats"), emit: stats
     path("${meta.id}.{R1,R2}.eestats"), emit: eestats
     path("${meta.id}.{R1,R2}.eestats2"), emit: eestats2
+    path("versions.yml")           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -33,6 +35,12 @@ process VSEARCH_EESTATS {
         vsearch --fastq_eestats2 ${reads[0]} \
             --ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
             --output ${meta.id}.R1.eestats2
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
+        END_VERSIONS
+
         """
     } else {
         """
@@ -55,13 +63,18 @@ process VSEARCH_EESTATS {
         vsearch --fastq_eestats2 ${reads[1]} \
             --ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
             --output ${meta.id}.R2.eestats2
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
+        END_VERSIONS
         """
     }
 
     stub:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}.PDF
+    touch ${meta.id}.R1.stats ${meta.id}.R1.eestats ${meta.id}.R1.eestats2
     """
 }
diff --git a/nextflow.config b/nextflow.config
@@ -27,8 +27,10 @@ params {
     // precheck = false
 
     // TODO: this needs to be removed or made more specific
-    // amplicon = '16S'
-    // platform = 'illumina'
+    amplicon = "16S" 
+    quality_binning = false  // set to true if using binned qualities (NovaSeq, PacBio Revio)
+    amplicon_type = "overlapping"
+    platform = "illumina"
 
     // QC
     // skip_FASTQC = false  // set to run this step by default, this can fail with large sample #'s
@@ -63,8 +65,7 @@ params {
     rmPhiX = false
 
     // Error model
-    quality_binning = false  // false, set to true if using binned qualities (NovaSeq)
-    error_model = 'illumina' // NYI, thinking about best way to implement this
+    // custom_error_model = 'illumina' // NYI, thinking about best way to implement this
 
     // Merging
     // paired_type = "overlapping"  // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length)

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -429,17 +429,28 @@
             "default": "md5"
         },
         "quality_binning": {
-            "type": "boolean"
-        },
-        "error_model": {
-            "type": "string",
-            "default": "illumina"
+            "type": "boolean",
+            "description": "Are the quality scores binned?  This can be used to adjust the error model for DADA2."
         },
         "rescue_unmerged": {
             "type": "boolean"
         },
         "removeBimeraDenovo_options": {
             "type": "string"
+        },
+        "amplicon": {
+            "type": "string",
+            "description": "Amplicon name",
+            "default": "16S"
+        },
+        "platform": {
+            "type": "string",
+            "default": "illumina",
+            "description": "Sequencing platform"
+        },
+        "amplicon_type": {
+            "type": "string",
+            "default": "overlapping"
         }
     }
 }
diff --git a/workflows/tada.nf b/workflows/tada.nf
@@ -61,12 +61,11 @@ workflow TADA {
     ch_versions = Channel.empty()
     ch_multiqc_files = Channel.empty()
 
-    // def platform = ''
-    // def platform = params.platform.toLowerCase()
+    def platform = params.platform.toLowerCase()
 
-    // if (!(["illumina","pacbio","pacbio-kinnex"].contains(platform))) {
-    //     exit 1, "Only supported platforms (--platform argument) are currently 'pacbio', 'pacbio-kinnex', or 'illumina'"
-    // }
+    if (!(["illumina","pacbio"].contains(platform))) {
+        exit 1, "Only supported platforms (--platform argument) are currently 'pacbio' or 'illumina'"
+    }
 
     // TODO: implement seqtable input
     // // ${deity} there has to be a better way to check this!
@@ -88,9 +87,9 @@ workflow TADA {
     //     }
     // }
 
-    if (params.aligner == 'infernal' && params.infernalCM == false){
-        exit 1, "Must set covariance model using --infernalCM when using Infernal"
-    }
+    // if (params.aligner == 'infernal' && params.infernalCM == false){
+    //     exit 1, "Must set covariance model using --infernalCM when using Infernal"
+    // }
 
     if (!(['simple','md5'].contains(params.id_type))) {
         exit 1, "--id_type can currently only be set to 'simple' or 'md5', got ${params.id_type}"
@@ -112,10 +111,14 @@ workflow TADA {
         ch_samplesheet
     )
 
+    ch_versions = ch_versions.mix(PLOT_QUALITY_PROFILE.out.versions.first())
+
     VSEARCH_EESTATS (
         ch_samplesheet
     )
 
+    ch_versions = ch_versions.mix(VSEARCH_EESTATS.out.versions.first())
+
     // TODO: notice aggregation of data for multiqc and for version tracking, 
     // needs to be added throughout the workflow
     // ch_multiqc_files = ch_multiqc_files.mix(PLOTQUALITYPROFILE.out.zip.collect{it[1]})