diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 7d41d51..d53ed0b 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fastq_1,fastq_2,read_structure +sample,fastq_1,fastq_2,read_structure,fastq_umi SAMPLE_DUPLEX_SEQ,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,10M1S+T 10M1S+T SAMPLE_SINGLE_UMI,/path/to/fastq/files/AEG588A1_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S2_L002_R2_001.fastq.gz,12M+T +T +SAMPLE_UMI_FASTQ,/path/to/fastq/files/AEG588A1_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S2_L002_R3_001.fastq.gz,+T +T +M,/path/to/fastq/files/AEG588A1_S2_L002_R2_001.fastq.gz diff --git a/assets/schema_input.json b/assets/schema_input.json index 2697ff5..bcd607e 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -27,6 +27,13 @@ "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, + "fastq_3": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 3 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, "read_structure": { "type": "string", "pattern": "^.*$", @@ -34,6 +41,6 @@ "meta": ["read_structure"] } }, - "required": ["sample", "fastq_1", "fastq_2", "read_structure"] + "required": ["sample", "fastq_1", "read_structure"] } } diff --git a/subworkflows/local/utils_nfcore_fastquorum_pipeline/main.nf b/subworkflows/local/utils_nfcore_fastquorum_pipeline/main.nf index 4d8c056..ede0bd0 100644 --- a/subworkflows/local/utils_nfcore_fastquorum_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fastquorum_pipeline/main.nf @@ -92,13 +92,18 @@ workflow PIPELINE_INITIALISATION { Channel .fromSamplesheet("input") .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { + meta, fastq_1, fastq_2, fastq_3 -> + if (fastq_3) { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2, fastq_3 ] ] + } else if (fastq_2) { return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } else { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] } } + .map { + validateReadStructure(it) + } .groupTuple() .map { validateInputSamplesheet(it) @@ -163,6 +168,21 @@ def validateInputParameters() { genomeExistsError() } +def validateReadStructure(input) { + def id = input[0] + def meta = input[1] + def fastqs = input[2] + + def num_fastqs = fastqs.size() + def num_structures = meta.read_structure.tokenize(" ").size() + + if (num_fastqs != num_structures) { + error("Please check input samplesheet -> Number of fastq files (${num_fastqs}) does not match the number of read structures (${num_structures}): ${id}, '${meta.read_structure}'") + } + return [ id, meta, fastqs ] +} + + // // Validate channels from input samplesheet // @@ -177,6 +197,7 @@ def validateInputSamplesheet(input) { return [ metas[0], fastqs ] } + // // Get attribute from genome config file e.g. fasta //