Skip to content

Commit

Permalink
Fastp: Added support for unpaired and merged reads output (#356)
Browse files Browse the repository at this point in the history
* Added support for unpaired and merged reads output

* Clean up

* Added option for output failed reads

* Code tweak

Co-authored-by: Jan Forster <[email protected]>

* Added some examples

* Added missing merge option

* Fixed bug when checking merge option

Co-authored-by: Jan Forster <[email protected]>
  • Loading branch information
fgvieira and jafors authored May 28, 2021
1 parent e53c4d5 commit 1a4ea4d
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 2 deletions.
14 changes: 14 additions & 0 deletions bio/fastp/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,17 @@ name: "fastp"
description: trim and QC fastq reads with fastp
authors:
- Sebastian Kurscheid ([email protected])
- Filipe G. Vieira
input:
- fastq file(s)
output:
- trimmed fastq file(s)
- unpaired reads (optional; eihter in a single file or separate)
- merged reads (optional)
- failed reads (optional)
- json file containing trimming statistics
- html file containing trimming statistics
notes: |
* The `adapters` param allows to specify adapter sequences
* The `extra` param allows for additional program arguments.
* For more inforamtion see, https://github.com/OpenGene/fastp
10 changes: 9 additions & 1 deletion bio/fastp/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ rule fastp_se:
sample=["reads/se/{sample}.fastq"]
output:
trimmed="trimmed/se/{sample}.fastq",
failed="trimmed/se/{sample}.failed.fastq",
html="report/se/{sample}.html",
json="report/se/{sample}.json"
log:
Expand All @@ -20,13 +21,20 @@ rule fastp_pe:
sample=["reads/pe/{sample}.1.fastq", "reads/pe/{sample}.2.fastq"]
output:
trimmed=["trimmed/pe/{sample}.1.fastq", "trimmed/pe/{sample}.2.fastq"],
# Unpaired reads separately
unpaired1="trimmed/pe/{sample}.u1.fastq",
unpaired2="trimmed/pe/{sample}.u2.fastq",
# or in a single file
# unpaired="trimmed/pe/{sample}.singletons.fastq",
merged="trimmed/pe/{sample}.merged.fastq",
failed="trimmed/pe/{sample}.failed.fastq",
html="report/pe/{sample}.html",
json="report/pe/{sample}.json"
log:
"logs/fastp/pe/{sample}.log"
params:
adapters="--adapter_sequence ACGGCTAGCTA --adapter_sequence_r2 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC",
extra=""
extra="--merge"
threads: 2
wrapper:
"master/bio/fastp"
Expand Down
39 changes: 38 additions & 1 deletion bio/fastp/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,70 @@
__license__ = "MIT"

from snakemake.shell import shell
import re

extra = snakemake.params.get("extra", "")
adapters = snakemake.params.get("adapters", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


# Assert input
n = len(snakemake.input.sample)
assert (
n == 1 or n == 2
), "input->sample must have 1 (single-end) or 2 (paired-end) elements."


# Input files
if n == 1:
reads = "--in1 {}".format(snakemake.input.sample)
else:
reads = "--in1 {} --in2 {}".format(*snakemake.input.sample)


# Output files
trimmed_paths = snakemake.output.get("trimmed", None)
if trimmed_paths is not None:
if trimmed_paths:
if n == 1:
trimmed = "--out1 {}".format(snakemake.output.trimmed)
else:
trimmed = "--out1 {} --out2 {}".format(*snakemake.output.trimmed)

# Output unpaired files
unpaired = snakemake.output.get("unpaired", None)
if unpaired:
trimmed += f" --unpaired1 {unpaired} --unpaired2 {unpaired}"
else:
unpaired1 = snakemake.output.get("unpaired1", None)
if unpaired1:
trimmed += f" --unpaired1 {unpaired1}"
unpaired2 = snakemake.output.get("unpaired2", None)
if unpaired2:
trimmed += f" --unpaired2 {unpaired2}"

# Output merged PE reads
merged = snakemake.output.get("merged", None)
if merged:
if not re.search(r"--merge\b", extra):
raise ValueError(
"output.merged specified but '--merge' option missing from params.extra"
)
trimmed += f" --merged_out {merged}"
else:
trimmed = ""


# Output failed reads
failed = snakemake.output.get("failed", None)
if failed:
trimmed += f" --failed_out {failed}"


# Stats
html = "--html {}".format(snakemake.output.html)
json = "--json {}".format(snakemake.output.json)


shell(
"(fastp --thread {snakemake.threads} "
"{extra} "
Expand Down

0 comments on commit 1a4ea4d

Please sign in to comment.