Merge pull request #9 from BDI-pathogens/hexamer_latest

Hexamer latest
BDI-pathogens · Oct 19, 2021 · 481c1cd · 481c1cd
2 parents f3440e5 + 9dc63ca
commit 481c1cd
Show file tree

Hide file tree

Showing 11 changed files with 343 additions and 352 deletions.
diff --git a/README.md b/README.md
@@ -59,11 +59,11 @@ module load python/2.7.11
 
 ## Manual setup
 
-1. Create a `samples.txt` file in the raw data directory containing a list of samples for processing, with one sample per row.
+1. Create a directory for data processing of the form `YYYY-MM-DD_<Batch Name(s)>` and navigate to that directory.
 
-1. Create a directory for data processing, e.g. `YYYY-MM-DD_<Batch Name(s)>`.
+1. Create a `samples.txt` file containing a list of samples for processing, with one sample per row.
 
-1. Navigate to that directory and clone this repo:
+1. Clone this repo:
     ```
     git clone [email protected]:BDI-pathogens/ShiverCovid.git
     ```

diff --git a/snakemake/Snakefile b/snakemake/Snakefile
@@ -170,67 +170,65 @@ rule pipeline_kraken_gzip:
     shell:
         "gzip {input.kraken}"
 
-rule pipeline_trimmomatic:
+rule pipeline_preprocess_readnames_fwd:
     input:
         filt_fwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_1_filt.fastq"),
-        filt_bwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_2_filt.fastq"),
-        adapters=ADAPTERS_FILE,
-        script=join(REPO_BASE_DIR,"snakemake","scripts","pipeline","trimmomatic.sh")
+        script=join(REPO_BASE_DIR,"snakemake","scripts","pipeline","preprocess_readnames_fwd.sh"),
+        py_script=join(REPO_BASE_DIR,"snakemake","scripts","pipeline","preprocess_readnames.py")
     output:
-        clean_fwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_1_clean.fastq"),
-        clean_bwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_2_clean.fastq"),
-        tmp_fq_fwd=temp(expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_1_trimmings.fq")),
-        tmp_fq_bwd=temp(expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_2_trimmings.fq"))
-    log: expand(LOG_DIR_PIPELINE + "/{{samples}}_trimmomatic.log")
+        tmp_reads_fwd=temp(expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_reads1.fastq"))
+    log: expand(LOG_DIR_PIPELINE + "/{{samples}}_preprocess_readnames_fwd.log")
     params:
-        trimmomatic_minlen=80,
         conda_bin=CONDA_BIN,
         queues=CLUSTER_CONFIG_SHORT_QUEUES,
-        cores=4,
+        cores=1,
         project=CLUSTER_PROJECT_NAME
     shell:
-        "{input.script} {params.conda_bin} {input.filt_fwd} {input.filt_bwd} \
-{output.clean_fwd} {output.clean_bwd} {output.tmp_fq_fwd} {output.tmp_fq_bwd} {input.adapters} \
-{params.trimmomatic_minlen} {log} {params.cores} >{log} 2>&1"
+        "{input.script} {input.py_script} {params.conda_bin} {input.filt_fwd} {output.tmp_reads_fwd} {log} >{log} 2>&1"
 
-rule shiver_preprocess_readnames_fwd:
+rule pipeline_preprocess_readnames_bwd:
     input:
-        filt_fwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_1_filt.fastq"),
-        script=join(REPO_BASE_DIR,"snakemake","scripts","shiver","preprocess_readnames_fwd.sh"),
-        py_script=join(REPO_BASE_DIR,"snakemake","scripts","shiver","preprocess_readnames.py")
+        filt_bwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_2_filt.fastq"),
+        script=join(REPO_BASE_DIR,"snakemake","scripts","pipeline","preprocess_readnames_bwd.sh"),
+        py_script=join(REPO_BASE_DIR,"snakemake","scripts","pipeline","preprocess_readnames.py")
     output:
-        tmp_reads_fwd=temp(expand(OUTPUT_DIR_SHIVER_RAW + "/temp_{{samples}}_reads1.fastq"))
-    log: expand(LOG_DIR_SHIVER_RAW + "/{{samples}}_preprocess_readnames_fwd.log")
+        tmp_reads_bwd=temp(expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_reads2.fastq"))
+    log: expand(LOG_DIR_PIPELINE + "/{{samples}}_preprocess_readnames_bwd.log")
     params:
         conda_bin=CONDA_BIN,
         queues=CLUSTER_CONFIG_SHORT_QUEUES,
         cores=1,
         project=CLUSTER_PROJECT_NAME
     shell:
-        "{input.script} {input.py_script} {params.conda_bin} {input.filt_fwd} {output.tmp_reads_fwd} {log} >{log} 2>&1"
+        "{input.script} {input.py_script} {params.conda_bin} {input.filt_bwd} {output.tmp_reads_bwd} {log} >{log} 2>&1"
 
-rule shiver_preprocess_readnames_bwd:
+rule pipeline_trimmomatic:
     input:
-        filt_bwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_2_filt.fastq"),
-        script=join(REPO_BASE_DIR,"snakemake","scripts","shiver","preprocess_readnames_bwd.sh"),
-        py_script=join(REPO_BASE_DIR,"snakemake","scripts","shiver","preprocess_readnames.py")
+        filt_fwd=expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_reads1.fastq"),
+        filt_bwd=expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_reads2.fastq"),
+        adapters=ADAPTERS_FILE,
+        script=join(REPO_BASE_DIR,"snakemake","scripts","pipeline","trimmomatic.sh")
     output:
-        tmp_reads_bwd=temp(expand(OUTPUT_DIR_SHIVER_RAW + "/temp_{{samples}}_reads2.fastq"))
-    log: expand(LOG_DIR_SHIVER_RAW + "/{{samples}}_preprocess_readnames_bwd.log")
+        clean_fwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_1_clean.fastq"),
+        clean_bwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_2_clean.fastq"),
+        tmp_fq_fwd=temp(expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_1_trimmings.fq")),
+        tmp_fq_bwd=temp(expand(OUTPUT_DIR_PIPELINE + "/temp_{{samples}}_2_trimmings.fq"))
+    log: expand(LOG_DIR_PIPELINE + "/{{samples}}_trimmomatic.log")
     params:
+        trimmomatic_minlen=50,
         conda_bin=CONDA_BIN,
         queues=CLUSTER_CONFIG_SHORT_QUEUES,
-        cores=1,
+        cores=4,
         project=CLUSTER_PROJECT_NAME
     shell:
-        "{input.script} {input.py_script} {params.conda_bin} {input.filt_bwd} {output.tmp_reads_bwd} {log} >{log} 2>&1"
+        "{input.script} {params.conda_bin} {input.filt_fwd} {input.filt_bwd} \
+{output.clean_fwd} {output.clean_bwd} {output.tmp_fq_fwd} {output.tmp_fq_bwd} {input.adapters} \
+{params.trimmomatic_minlen} {log} {params.cores} >{log} 2>&1"
 
 rule shiver_map_reads:
     input:
         clean_fwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_1_clean.fastq"),
         clean_bwd=expand(OUTPUT_DIR_PIPELINE + "/{{samples}}_2_clean.fastq"),
-        tmp_reads_fwd=expand(OUTPUT_DIR_SHIVER_RAW + "/temp_{{samples}}_reads1.fastq"),
-        tmp_reads_bwd=expand(OUTPUT_DIR_SHIVER_RAW + "/temp_{{samples}}_reads2.fastq"),
         shiver_config=RAW_SHIVER_CONFIG,
         ref_stem=REF_STEM_FILE,
         script=join(REPO_BASE_DIR,"snakemake","scripts","shiver","run_shiver_map_reads.sh"),
@@ -265,7 +263,7 @@ rule shiver_map_reads:
     shell:
         "{input.script} {params.sequence} {params.shiver_initdir} {input.shiver_config} \
 {input.shiver_script} {params.tmp_dir_sequence} {input.clean_fwd} {input.clean_bwd} \
-{input.tmp_reads_fwd} {input.tmp_reads_bwd} {output.tmp_bam} {output.tmp_base_freqs} {output.tmp_base_freqs_glob} \
+{output.tmp_bam} {output.tmp_base_freqs} {output.tmp_base_freqs_glob} \
 {output.tmp_dedup_stats} {output.tmp_insert_size} {output.tmp_prededup} {output.tmp_consensus} \
 {output.tmp_consensus_glob} {output.tmp_coords} {output.tmp_ref_fasta} {output.tmp_ref_fasta_fai} {output.tmp_blast} \
 {output.tmp_shiver_contigs} {input.ref_stem} {log} >{log} 2>&1"

diff --git a/snakemake/auxfiles/shiver_config_raw.sh b/snakemake/auxfiles/shiver_config_raw.sh
@@ -110,7 +110,7 @@ ContigMinBlastOverlapToMerge='2'
 MafftTestingStrategy="MinAlnLength"
 
 # Shall we trim adapaters and low quality bases from reads, using trimmomatic?
-TrimReadsForAdaptersAndQual=true
+TrimReadsForAdaptersAndQual=false
 # The trimmomatic manual explains at length the parameters controlling read
 # trimming; the reader is referred to it for explanations of the following
 # variables and other options not used here:
@@ -121,7 +121,7 @@ BaseQualityParams='MINLEN:50 LEADING:20 TRAILING:20 SLIDINGWINDOW:4:20'
 NumThreadsTrimmomatic=1
 
 # Shall we trim exact matches to PCR primers from the end of reads using fastaq?
-TrimReadsForPrimers=true
+TrimReadsForPrimers=false
 # Shall we also trim matches to the PCR primers that differ by a single base
 # change? (This slows down the trimming step a lot.)
 TrimPrimerWithOneSNP=false
@@ -131,7 +131,7 @@ CleanReads=false
 
 # Which mapper to use? "smalt", "bwa" or "bowtie"? You can ignore the options
 # for a mapper you're not using, and it doesn't need to be installed.
-mapper='smalt'
+mapper='bowtie'
 
 # Check the smalt documentation for a full explanation of options,
 # including those not used by default here.