From 0c7ae27622e45167b9cc1aaf1be238b0d25429b9 Mon Sep 17 00:00:00 2001 From: "Filipe G. Vieira" <1151762+fgvieira@users.noreply.github.com> Date: Mon, 20 May 2024 10:08:51 +0200 Subject: [PATCH] feat: add wrapper to samtools collate (#2929) ### QC * [x] I confirm that: For all wrappers added by this PR, * there is a test case which covers any introduced changes, * `input:` and `output:` file paths in the resulting rule can be changed arbitrarily, * either the wrapper can only use a single core, or the example rule contains a `threads: x` statement with `x` being a reasonable default, * rule names in the test case are in [snake_case](https://en.wikipedia.org/wiki/Snake_case) and somehow tell what the rule is about or match the tools purpose or name (e.g., `map_reads` for a step that maps reads), * all `environment.yaml` specifications follow [the respective best practices](https://stackoverflow.com/a/64594513/2352071), * the `environment.yaml` pinning has been updated by running `snakedeploy pin-conda-envs environment.yaml` on a linux machine, * wherever possible, command line arguments are inferred and set automatically (e.g. based on file extensions in `input:` or `output:`), * all fields of the example rules in the `Snakefile`s and their entries are explained via comments (`input:`/`output:`/`params:` etc.), * `stderr` and/or `stdout` are logged correctly (`log:`), depending on the wrapped tool, * temporary files are either written to a unique hidden folder in the working directory, or (better) stored where the Python function `tempfile.gettempdir()` points to (see [here](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir); this also means that using any Python `tempfile` default behavior works), * the `meta.yaml` contains a link to the documentation of the respective tool or command, * `Snakefile`s pass the linting (`snakemake --lint`), * `Snakefile`s are formatted with [snakefmt](https://github.com/snakemake/snakefmt), * Python wrapper scripts are formatted with [black](https://black.readthedocs.io). * Conda environments use a minimal amount of channels, in recommended ordering. E.g. for bioconda, use (conda-forge, bioconda, nodefaults, as conda-forge should have highest priority and defaults channels are usually not needed because most packages are in conda-forge nowadays). --------- Co-authored-by: Christian Meesters --- .../collate/environment.linux-64.pin.txt | 42 ++++++++++++++++++ bio/samtools/collate/environment.yaml | 7 +++ bio/samtools/collate/meta.yaml | 11 +++++ bio/samtools/collate/test/Snakefile | 12 +++++ bio/samtools/collate/test/mapped/a.bam | Bin 0 -> 184 bytes bio/samtools/collate/wrapper.py | 21 +++++++++ bio/samtools/sort/test/Snakefile | 2 +- test.py | 8 ++++ 8 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 bio/samtools/collate/environment.linux-64.pin.txt create mode 100644 bio/samtools/collate/environment.yaml create mode 100644 bio/samtools/collate/meta.yaml create mode 100644 bio/samtools/collate/test/Snakefile create mode 100644 bio/samtools/collate/test/mapped/a.bam create mode 100644 bio/samtools/collate/wrapper.py diff --git a/bio/samtools/collate/environment.linux-64.pin.txt b/bio/samtools/collate/environment.linux-64.pin.txt new file mode 100644 index 0000000000..3dde3547a5 --- /dev/null +++ b/bio/samtools/collate/environment.linux-64.pin.txt @@ -0,0 +1,42 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda#f6f6600d18a4047b54f803cf708b868a +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda#d211c42b9ce49aee3734fdc828731689 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda#d4ff227c46917d3b4565302a2bbb276b +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/python-3.12.3-hab00c5b_0_cpython.conda#2540b74d304f71d3e89c81209db4db84 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.7.1-hca28451_0.conda#755c7f876815003337d2c61ff5d047e5 +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e +https://conda.anaconda.org/bioconda/noarch/snakemake-wrapper-utils-0.6.2-pyhdfd78af_0.tar.bz2#fd8759bbd04116eace828c4fab906096 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/bioconda/linux-64/htslib-1.20-h81da01d_0.tar.bz2#1084947eefd2bbe9c1f84ca24061a9d5 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/bioconda/linux-64/samtools-1.20-h50ea8bc_0.tar.bz2#7b3b1d0feea64e7e211ae24e7cd126d8 diff --git a/bio/samtools/collate/environment.yaml b/bio/samtools/collate/environment.yaml new file mode 100644 index 0000000000..3defe4e72c --- /dev/null +++ b/bio/samtools/collate/environment.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - samtools =1.20 + - snakemake-wrapper-utils =0.6.2 diff --git a/bio/samtools/collate/meta.yaml b/bio/samtools/collate/meta.yaml new file mode 100644 index 0000000000..6ce423746a --- /dev/null +++ b/bio/samtools/collate/meta.yaml @@ -0,0 +1,11 @@ +name: samtools collate +description: Shuffles and groups reads together by their names. +url: http://www.htslib.org/doc/samtools-collate.html +authors: + - Filipe G. Vieira +input: + - SAM/BAM/CRAM file +output: + - SAM/BAM/CRAM file +params: + - extra: additional program arguments (not `-@/--threads`, `--reference`, `-o` or `-O/--output-fmt`). diff --git a/bio/samtools/collate/test/Snakefile b/bio/samtools/collate/test/Snakefile new file mode 100644 index 0000000000..c2934e3f3a --- /dev/null +++ b/bio/samtools/collate/test/Snakefile @@ -0,0 +1,12 @@ +rule samtools_collate: + input: + "mapped/{sample}.bam", + output: + "{sample}.collated.bam", + log: + "logs/{sample}.log", + params: + extra="-f", + threads: 2 + wrapper: + "master/bio/samtools/collate" diff --git a/bio/samtools/collate/test/mapped/a.bam b/bio/samtools/collate/test/mapped/a.bam new file mode 100644 index 0000000000000000000000000000000000000000..dba1268acbd8446e4fde54d7da33434597fbe635 GIT binary patch literal 184 zcmb2|=3rp}f&Xj_PR>jWb_~TuUs6R95)ukH_@3~5+q`PUgD)R98yP)FV(BtuE_7vW z=9s|5aI{h|P#vgC9!+};gK@G0Lz-KrbIh-tVq12fpEAOZjf CvNY8I literal 0 HcmV?d00001 diff --git a/bio/samtools/collate/wrapper.py b/bio/samtools/collate/wrapper.py new file mode 100644 index 0000000000..2e583e69db --- /dev/null +++ b/bio/samtools/collate/wrapper.py @@ -0,0 +1,21 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2024, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from pathlib import Path +from snakemake.shell import shell +from snakemake_wrapper_utils.samtools import get_samtools_opts + + +samtools_opts = get_samtools_opts(snakemake, parse_write_index=False) +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +with tempfile.TemporaryDirectory() as tmpdir: + tmp_prefix = Path(tmpdir) / "samtools_collate" + + shell( + "samtools collate {samtools_opts} {extra} -T {tmp_prefix} {snakemake.input[0]} {log}" + ) diff --git a/bio/samtools/sort/test/Snakefile b/bio/samtools/sort/test/Snakefile index b8e8f16a96..4504014a05 100644 --- a/bio/samtools/sort/test/Snakefile +++ b/bio/samtools/sort/test/Snakefile @@ -4,7 +4,7 @@ rule samtools_sort: output: "mapped/{sample}.sorted.bam", log: - "{sample}.log", + "logs/{sample}.log", params: extra="-m 4G", threads: 8 diff --git a/test.py b/test.py index c53f2774d9..54d41431cf 100644 --- a/test.py +++ b/test.py @@ -4041,6 +4041,14 @@ def test_samtools_calmd(): ) +@skip_if_not_modified +def test_samtools_collate(): + run( + "bio/samtools/collate", + ["snakemake", "--cores", "1", "a.collated.bam", "--use-conda", "-F"], + ) + + @skip_if_not_modified def test_samtools_fixmate(): run(