Skip to content

Commit

Permalink
Add multiqc (#14)
Browse files Browse the repository at this point in the history
* add ngs_qa_qc dockerfile and ep

* begin restructuring of workflows for extension of demux workflow

* start ngs qc/qa pipeline, fastqc - trimmed/untrimmed + fastp trimming

* ngsqc fastqc, trimming, fastqc again after trim

* fix line endings on ep.sh

* add in fastq screen config to docker

* final ngsqc dockerfile

* feat: expand ngs qc workflow

* feat: python module support for ngs qc-qa

* chore: ignore jsons, lower latentcy wait for biowulf

* feat: finalize first half of ngsqc pipeline

* fix: snakemake pathing correction

* fix: dry run action needs -s kwarg

* chore: fix args in github action

* chore: refactor setuptools package, force include workflow and profiles, fix dry run command

* chore: dry run command not being executed

* feat: kaiju and kraken annotation rules, beginning

* chore: fix more merge conflicts

* feat: working kraken & kaiju

* chore: remap outputs to discussed structure

* fix: align io paths for ngsqc workflow

* chore: relocate slurm logs directory

* fix: correct fastqc_trimmed output paths

* fix: dry run action, add path to env

* fix: broken path in dry run action

* fix: cat from /Users/routsongrm/git/NGS/Dmux

* fix: cat from \$PWD/NGS/Dmux

* feat: add multiqc report

* fix: new flag for dry run in CI
  • Loading branch information
rroutsong authored Oct 31, 2023
1 parent f4c33d3 commit 2ff4353
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dryrun.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Dry Run with test data
run: |
docker run -h cn0000 -v $PWD:/opt2 snakemake/snakemake:stable /bin/bash -c \
"pip install /opt2; dmux run -s /opt2/.tests/illumnia_demux -o /opt2/.tests/illumnia_demux/dry_run_out --local --pretend /opt2/.tests/illumnia_demux"
"pip install /opt2; dmux run -s /opt2/.tests/illumnia_demux -o /opt2/.tests/illumnia_demux/dry_run_out --local --dry-run /opt2/.tests/illumnia_demux"
- name: View the pipeline config file
run: |
echo "Generated config file for pipeline...." && cat $PWD/.tests/illumnia_demux/dry_run_out/EXP_PROJ_demux/.config/config_job_0.json
Expand Down
10 changes: 5 additions & 5 deletions bin/dmux.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def run(args):
config['bcl_files'].append(list(Path(rundir).rglob('*.bcl.*')))
out_to = Path(args.output, f"{sample_sheet.Header['Experiment Name']}_demux") if args.output \
else Path(rundir, f"{sample_sheet.Header['Experiment Name']}_demux")
utils.valid_run_output(out_to, dry_run=args.pretend)
utils.valid_run_output(out_to, dry_run=args.dry_run)
config['out_to'].append(out_to)

utils.exec_demux_pipeline(config, dry_run=args.pretend, local=args.local)
utils.exec_demux_pipeline(config, dry_run=args.dry_run, local=args.local)

# if qc not disabled:
# - mutate config into structs/data appropriate for `args`
Expand Down Expand Up @@ -80,7 +80,7 @@ def ngsqc(args):

configs['out_to'].append(out_base)

utils.exec_ngsqc_pipeline(configs, dry_run=args.pretend, local=args.local)
utils.exec_ngsqc_pipeline(configs, dry_run=args.dry_run, local=args.local)


def logs(args):
Expand All @@ -103,7 +103,7 @@ def logs(args):
'matching run ids, if not using full paths.')
parser_run.add_argument('-o', '--output', metavar='<output directory>', default=None, type=str,
help='Top-level output directory for demultiplexing data (defaults to input directory + runid + "_demux")')
parser_run.add_argument('-p', '--pretend', action='store_true',
parser_run.add_argument('-d', '--dry-run', action='store_true',
help='Dry run the demultiplexing workflow')
parser_run.add_argument('-l', '--local', action='store_true',
help='Execute pipeline locally without a dispatching executor')
Expand All @@ -119,7 +119,7 @@ def logs(args):
parser_ngs_qc.add_argument('-s', '--seq_dir', metavar='<sequencing directory>', default=None, type=str,
help='Root directory for sequencing data (defaults for biowulf/bigsky/locus), must contain directories ' + \
'matching run ids, if not using full paths.')
parser_ngs_qc.add_argument('-p', '--pretend', action='store_true',
parser_ngs_qc.add_argument('-d', '--dry-run', action='store_true',
help='Dry run the demultiplexing workflow')
parser_ngs_qc.add_argument('-l', '--local', action='store_true',
help='Execute pipeline locally without a dispatching executor')
Expand Down
2 changes: 2 additions & 0 deletions src/Dmux/workflow/ngs_qaqc/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ rule all:
# kaiju
expand("{out_dir}/{project}/{rid}/{sids}/kaiju/{sids}.tsv", out_dir=config['out_to'], sids=config['sids'],
project=config['projects'], rid=config['run_ids']),
f"{config['out_to']}/{config['projects']}/{config['run_ids']}/multiqc/multiqc_report.html",


include: "fastq.smk"
include: "qc.smk"
11 changes: 6 additions & 5 deletions src/Dmux/workflow/ngs_qaqc/fastq.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ rule trim_w_fastp:
in_read2 = config['demux_dir'] + "/{project}/{sid}_R2_001.fastq.gz",
output:
html = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastp/{sid}.html",
json = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastp/{sid}.json",
json = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastp/{sid}_fastp.json",
out_read1 = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastp/{sid}_trimmed_R1.fastq.gz",
out_read2 = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastp/{sid}_trimmed_R2.fastq.gz",
params:
adapters = get_adapter_opts,
# container: "docker://rroutsong/dmux_ngsqc:0.0.1",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
threads: 4,
resources: mem_mb = 8192,
Expand Down Expand Up @@ -42,7 +41,6 @@ rule fastq_screen:
subset = 1000000,
aligner = "bowtie2",
output_dir = lambda w: config['out_to'] + "/" + w.project + "/" + config['run_ids'] + "/" + w.sid + "/fastq_screen/",
# container: "docker://rroutsong/dmux_ngsqc:0.0.1",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
threads: 4,
resources: mem_mb = 8192,
Expand All @@ -65,11 +63,13 @@ rule kaiju_annotation:
read2 = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastp/{sid}_trimmed_R2.fastq.gz",
output:
kaiju_report = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/kaiju/{sid}.tsv",
kaiju_species = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/kaiju/{sid}_species.tsv",
kaiju_phylum = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/kaiju/{sid}_phylum.tsv",
params:
# TODO: soft code these paths
nodes = "/data/OpenOmics/references/Dmux/kaiju/kaiju_db_nr_euk_2023-05-10/nodes.dmp",
names = "/data/OpenOmics/references/Dmux/kaiju/kaiju_db_nr_euk_2023-05-10/names.dmp",
database = "/data/OpenOmics/references/Dmux/kaiju/kaiju_db_nr_euk_2023-05-10/kaiju_db_nr_euk.fmi",
# container: "docker://rroutsong/dmux_ngsqc:0.0.1",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
log: config['out_to'] + "/.logs/{project}/" + config['run_ids'] + "/kaiju/{sid}.log",
threads: 24
Expand All @@ -86,6 +86,8 @@ rule kaiju_annotation:
-j {input.read1} \
-z {threads} \
-o {output.kaiju_report}
kaiju2table -t {params.nodes} -n {params.names} -r species -o {output.kaiju_species} {output.kaiju_report}
kaiju2table -t {params.nodes} -n {params.names} -r phylum -o {output.kaiju_phylum} {output.kaiju_report}
"""


Expand All @@ -98,7 +100,6 @@ rule kraken_annotation:
kraken_log = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/kraken/{sid}.log",
params:
kraken_db = "/data/OpenOmics/references/Dmux/kraken2/k2_pluspfp_20230605"
# container: "docker://rroutsong/dmux_ngsqc:0.0.1",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
log: config['out_to'] + "/.logs/{project}/" + config['run_ids'] + "/kraken/{sid}.log",
threads: 24
Expand Down
45 changes: 43 additions & 2 deletions src/Dmux/workflow/ngs_qaqc/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ rule fastqc_untrimmed:
output_dir = lambda w: config['out_to'] + "/" + w.project + "/" + config['run_ids'] + "/" + w.sid + "/fastqc_untrimmed/"
log: config['out_to'] + "/.logs/{project}/" + config['run_ids'] + "/fastqc_untrimmed/{sid}_R{rnum}.log"
threads: 4
# container: "docker://rroutsong/dmux_ngsqc:0.0.1",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
resources: mem_mb = 8096
shell:
Expand All @@ -47,7 +46,6 @@ rule fastqc_trimmed:
fqreport = config['out_to'] + "/{project}/" + config['run_ids'] + "/{sid}/fastqc_trimmed/{sid}_trimmed_R{rnum}_fastqc.zip",
params:
output_dir = lambda w: config['out_to'] + "/" + w.project + "/" + config['run_ids'] + "/" + w.sid + "/fastqc_trimmed/"
# container: "docker://rroutsong/dmux_ngsqc:0.0.1",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
threads: 4
resources: mem_mb = 8096
Expand All @@ -57,3 +55,46 @@ rule fastqc_trimmed:
mkdir -p {params.output_dir}
fastqc -o {params.output_dir} -t {threads} {input.in_read}
"""


rule multiqc_report:
input:
# fastqc on untrimmed reads
expand("{out_dir}/{project}/{rid}/{sids}/fastqc_untrimmed/{sids}_R{rnum}_001_fastqc.zip", out_dir=config['out_to'],
project=config['projects'], rid=config['run_ids'], sids=config['sids'], rnum=config['rnums']),
# fastqc on trimmed reads
expand("{out_dir}/{project}/{rid}/{sids}/fastqc_trimmed/{sids}_trimmed_R{rnum}_fastqc.zip", out_dir=config['out_to'],
sids=config['sids'], project=config['projects'], rid=config['run_ids'], rnum=config['rnums']),
# fastp trimming metrics
expand("{out_dir}/{project}/{rid}/{sids}/fastp/{sids}_trimmed_R{rnum}.fastq.gz", out_dir=config['out_to'],
sids=config['sids'], project=config['projects'], rid=config['run_ids'], rnum=config['rnums']),
# fastq screen
expand("{out_dir}/{project}/{rid}/{sids}/fastq_screen/{sids}_trimmed_R{rnum}_screen.html", out_dir=config['out_to'],
sids=config['sids'], rnum=config['rnums'], rid=config['run_ids'], project=config['projects']),
# kraken2
expand("{out_dir}/{project}/{rid}/{sids}/kraken/{sids}.tsv", out_dir=config['out_to'], sids=config['sids'],
project=config['projects'], rid=config['run_ids']),
# kaiju
expand("{out_dir}/{project}/{rid}/{sids}/kaiju/{sids}.tsv", out_dir=config['out_to'], sids=config['sids'],
project=config['projects'], rid=config['run_ids']),
output:
mqc_report = f"{config['out_to']}/{config['projects']}/{config['run_ids']}" + \
"/multiqc/Run-" + config['run_ids'] + \
"-Project-" + config['projects'] + "_multiqc_report.html"
params:
input_dir = config['out_to'],
demux_dir = config['demux_dir'],
output_dir = config['out_to'] + "/" + config['projects'] + "/" + config['run_ids'] + "/multiqc/",
report_title = f"Run: {config['run_ids']}, Project: {config['projects']}",
containerized: "/data/OpenOmics/SIFs/dmux_ngsqc_0.0.1.sif"
threads: 4
resources: mem_mb = 8096
log: config['out_to'] + "/.logs/" + config['projects'] + "/" + config['run_ids'] + "/multiqc/multiqc.log"
shell:
"""
multiqc -q -ip \
--title \"{params.report_title}\" \
-o {params.output_dir} \
{params.input_dir} {params.demux_dir} \
--ignore ".cache" --ignore ".config" --ignore ".snakemake" --ignore ".slurm" --ignore ".singularity" --ignore ".logs"
"""

0 comments on commit 2ff4353

Please sign in to comment.