Skip to content

Commit

Permalink
feat: explicitly specify bwa index in bwa wrappers (#232)
Browse files Browse the repository at this point in the history
* [fix] (template): Missing code in wrappers' doc. Error #187

* [dev] (bwa): Automatic prefix detection (#228)

* [dev] (bwa): Automatix prefix detection (#228)

* [doc] (bwa): Snakefiles updated

* [dev] (bwa): Aln was forgotten at first

* [dev] (black): Reformatting

* Update Snakefile_samtools

* minimal formatting commit to trigger tests

test logs are not available any more

* revert test trigger change

* feat!: remove tmp dir parameter in bwa mem wrapper as this is now handled automatically by snakemake

* fix: fixed index adressing

* fmt

* fixes

* fixes

* fixed failing lint, fixed syntax

* add missing logfile

Co-authored-by: tdayris <[email protected]>
Co-authored-by: Johannes Köster <[email protected]>
Co-authored-by: David Laehnemann <[email protected]>
Co-authored-by: Johannes Köster <[email protected]>
  • Loading branch information
5 people authored Jan 26, 2022
1 parent ba8c91c commit 0e323b1
Show file tree
Hide file tree
Showing 31 changed files with 253 additions and 193 deletions.
6 changes: 3 additions & 3 deletions bio/bwa-mem2/index/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
rule bwa_mem2_index:
input:
"{genome}"
"{genome}",
output:
"{genome}.0123",
"{genome}.amb",
"{genome}.ann",
"{genome}.bwt.2bit.64",
"{genome}.pac",
log:
"logs/bwa-mem2_index/{genome}.log"
"logs/bwa-mem2_index/{genome}.log",
params:
prefix=lambda w: w.genome
prefix=lambda w: w.genome,
wrapper:
"master/bio/bwa-mem2/index"
6 changes: 5 additions & 1 deletion bio/bwa-mem2/index/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
raise ValueError("Please provide exactly one reference genome as input.")

# Prefix that should be used for the database
prefix = snakemake.params.get("prefix", "")
prefix = None
if "prefix" in snakemake.params.keys():
prefix = snakemake.params["prefix"]
else:
prefix = splitext(snakemake.output[0])[0]

if len(prefix) > 0:
prefix = "-p " + prefix
Expand Down
11 changes: 6 additions & 5 deletions bio/bwa-mem2/mem-samblaster/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
rule bwa_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"]
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"),
output:
bam="mapped/{sample}.bam",
index="mapped/{sample}.bam.bai"
index="mapped/{sample}.bam.bai",
log:
"logs/bwa_mem2_sambamba/{sample}.log"
"logs/bwa_mem2_sambamba/{sample}.log",
params:
index="genome.fasta",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sort_extra="-q" # Extra args for sambamba.
sort_extra="-q", # Extra args for sambamba.
threads: 8
wrapper:
"master/bio/bwa-mem2/mem-samblaster"
8 changes: 7 additions & 1 deletion bio/bwa-mem2/mem-samblaster/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
sort_extra = snakemake.params.get("sort_extra", "")
samblaster_extra = snakemake.params.get("samblaster_extra", "")

index = snakemake.input.get("index", "")
if isinstance(index, str):
index = path.splitext(snakemake.input.idx)[0]
else:
index = path.splitext(snakemake.input.idx[0])[0]

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

# Check inputs/arguments.
Expand All @@ -27,7 +33,7 @@
"(bwa-mem2 mem"
" -t {snakemake.threads}"
" {extra}"
" {snakemake.params.index}"
" {index}"
" {snakemake.input.reads}"
" | samblaster"
" {samblaster_extra}"
Expand Down
15 changes: 8 additions & 7 deletions bio/bwa-mem2/mem/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
rule bwa_mem2_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"]
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"),
output:
"mapped/{sample}.bam"
"mapped/{sample}.bam",
log:
"logs/bwa_mem2/{sample}.log"
"logs/bwa_mem2/{sample}.log",
params:
index="genome.fasta",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sort="none", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="" # Extra args for samtools/picard.
sort="none", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/bwa-mem2/mem"
15 changes: 8 additions & 7 deletions bio/bwa-mem2/mem/test/Snakefile_picard
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
rule bwa_mem2_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"]
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"),
output:
"mapped/{sample}.bam"
"mapped/{sample}.bam",
log:
"logs/bwa_mem/{sample}.log"
"logs/bwa_mem/{sample}.log",
params:
index="genome.fasta",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sort="picard", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="" # Extra args for samtools/picard.
sort="picard", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/bwa-mem2/mem"
15 changes: 8 additions & 7 deletions bio/bwa-mem2/mem/test/Snakefile_samtools
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
rule bwa_mem2_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"]
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"),
output:
"mapped/{sample}.bam"
"mapped/{sample}.bam",
log:
"logs/bwa_mem/{sample}.log"
"logs/bwa_mem/{sample}.log",
params:
index="genome.fasta",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sort="samtools", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="" # Extra args for samtools/picard.
sort="samtools", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="", # Extra args for samtools/picard.
threads: 8
wrapper:
"master/bio/bwa-mem2/mem"
8 changes: 7 additions & 1 deletion bio/bwa-mem2/mem/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
sort_order = snakemake.params.get("sort_order", "coordinate")
sort_extra = snakemake.params.get("sort_extra", "")

index = snakemake.input.get("index", "")
if isinstance(index, str):
index = path.splitext(snakemake.input.idx)[0]
else:
index = path.splitext(snakemake.input.idx[0])[0]

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

# Check inputs/arguments.
Expand Down Expand Up @@ -63,7 +69,7 @@
"(bwa-mem2 mem"
" -t {snakemake.threads}"
" {extra}"
" {snakemake.params.index}"
" {index}"
" {snakemake.input.reads}"
" | " + pipe_cmd + ") {log}"
)
11 changes: 6 additions & 5 deletions bio/bwa/aln/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
rule bwa_aln:
input:
"reads/{sample}.{pair}.fastq"
fastq="reads/{sample}.{pair}.fastq",
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
"sai/{sample}.{pair}.sai"
"sai/{sample}.{pair}.sai",
params:
index="genome",
extra=""
extra="",
log:
"logs/bwa_aln/{sample}.{pair}.log"
"logs/bwa_aln/{sample}.{pair}.log",
threads: 8
wrapper:
"master/bio/bwa/aln"
12 changes: 9 additions & 3 deletions bio/bwa/aln/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,24 @@
__email__ = "[email protected]"
__license__ = "MIT"


from os import path
from snakemake.shell import shell


extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

index = snakemake.input.idx
if isinstance(index, str):
index = path.splitext(snakemake.input.idx)[0]
else:
index = path.splitext(snakemake.input.idx[0])[0]

shell(
"bwa aln"
" {extra}"
" -t {snakemake.threads}"
" {snakemake.params.index}"
" {snakemake.input[0]}"
" {index}"
" {snakemake.input.fastq}"
" > {snakemake.output[0]} {log}"
)
15 changes: 5 additions & 10 deletions bio/bwa/index/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
rule bwa_index:
input:
"{genome}.fasta"
"{genome}.fasta",
output:
"{genome}.amb",
"{genome}.ann",
"{genome}.bwt",
"{genome}.pac",
"{genome}.sa"
idx=multiext("{genome}", ".amb", ".ann", ".bwt", ".pac", ".sa"),
log:
"logs/bwa_index/{genome}.log"
"logs/bwa_index/{genome}.log",
params:
prefix="{genome}",
algorithm="bwtsw"
algorithm="bwtsw",
wrapper:
"master/bio/bwa/index"
"master/bio/bwa/index"
4 changes: 2 additions & 2 deletions bio/bwa/index/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__email__ = "[email protected]"
__license__ = "MIT"

from os import path
from os.path import splitext

from snakemake.shell import shell

Expand All @@ -16,7 +16,7 @@
raise ValueError("Only one reference genome can be inputed!")

# Prefix that should be used for the database
prefix = snakemake.params.get("prefix", "")
prefix = snakemake.params.get("prefix", splitext(snakemake.output.idx[0])[0])

if len(prefix) > 0:
prefix = "-p " + prefix
Expand Down
11 changes: 6 additions & 5 deletions bio/bwa/mem-samblaster/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
rule bwa_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"]
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
bam="mapped/{sample}.bam",
index="mapped/{sample}.bam.bai"
index="mapped/{sample}.bam.bai",
log:
"logs/bwa_mem_sambamba/{sample}.log"
"logs/bwa_mem_sambamba/{sample}.log",
params:
index="genome",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sort_extra="" # Extra args for sambamba.
sort_extra="", # Extra args for sambamba.
threads: 8
wrapper:
"master/bio/bwa/mem-samblaster"
8 changes: 7 additions & 1 deletion bio/bwa/mem-samblaster/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
sort_extra = snakemake.params.get("sort_extra", "")
samblaster_extra = snakemake.params.get("samblaster_extra", "")

index = snakemake.input.get("index", "")
if isinstance(index, str):
index = path.splitext(snakemake.input.idx)[0]
else:
index = path.splitext(snakemake.input.idx[0])[0]

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

# Check inputs/arguments.
Expand All @@ -27,7 +33,7 @@
"(bwa mem"
" -t {snakemake.threads}"
" {extra}"
" {snakemake.params.index}"
" {index}"
" {snakemake.input.reads}"
" | samblaster"
" {samblaster_extra}"
Expand Down
4 changes: 2 additions & 2 deletions bio/bwa/mem/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
rule bwa_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
"mapped/{sample}.bam",
log:
"logs/bwa_mem/{sample}.log",
params:
index="genome",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sorting="none", # Can be 'none', 'samtools' or 'picard'.
sort_order="queryname", # Can be 'queryname' or 'coordinate'.
sort_extra="", # Extra args for samtools/picard.
tmp_dir="/tmp/", # Path to temp dir. (optional)
threads: 8
wrapper:
"master/bio/bwa/mem"
3 changes: 2 additions & 1 deletion bio/bwa/mem/test/Snakefile_picard
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
rule bwa_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
"mapped/{sample}.bam",
log:
"logs/bwa_mem/{sample}.log",
params:
index="genome",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sort="picard", # Can be 'none', 'samtools' or 'picard'.
sort_order="queryname", # Can be 'queryname' or 'coordinate'.
Expand Down
5 changes: 3 additions & 2 deletions bio/bwa/mem/test/Snakefile_samtools
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
rule bwa_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
# Index can be a list of (all) files created by bwa, or one of them
idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
"mapped/{sample}.bam",
log:
"logs/bwa_mem/{sample}.log",
params:
index="genome",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sorting="samtools", # Can be 'none', 'samtools' or 'picard'.
sort_order="queryname", # Can be 'queryname' or 'coordinate'.
Expand All @@ -20,13 +21,13 @@ rule bwa_mem:
rule bwa_mem_write_index:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
"mapped_with_index/{sample}.bam",
"mapped_with_index/{sample}.bam.csi",
log:
"logs/bwa_mem/{sample}.log",
params:
index="genome",
extra=r"-R '@RG\tID:{sample}\tSM:{sample}'",
sorting="samtools", # Can be 'none', 'samtools' or 'picard'.
sort_order="coordinate", # Can be 'queryname' or 'coordinate'.
Expand Down
Loading

0 comments on commit 0e323b1

Please sign in to comment.