From f1b80a92e556675f23d041e70eeb53c13085b041 Mon Sep 17 00:00:00 2001 From: betteridiot Date: Fri, 12 Oct 2018 20:42:43 -0400 Subject: [PATCH] versioning master --- README.rst | 194 ++++++++++++++++++--------------------- codemeta.json | 4 +- docs/paper/codemeta.json | 4 +- setup.py | 2 +- 4 files changed, 94 insertions(+), 110 deletions(-) diff --git a/README.rst b/README.rst index de0fb19..2d0e40d 100644 --- a/README.rst +++ b/README.rst @@ -1,15 +1,22 @@ -|Build Status| |noarch| |Documentation Status| |Conda Version| |PyPI -version| |Maintainability| - -|status| |DOI| |License| - -+---------+---------------------+ -| Host | Downloads | -+=========+=====================+ -| PyPI | |Downloads| | -+---------+---------------------+ -| conda | |Conda Downloads| | -+---------+---------------------+ +`Build Status `__ +`noarch `__ +`Documentation +Status `__ +`Conda Version `__ `PyPI +version `__ +`Maintainability `__ + +`status `__ +`DOI `__ +`License `__ + ++-------+------------------------------------------------------------------+ +| Host | Downloads | ++=======+==================================================================+ +| PyPI | `Downloads `__ | ++-------+------------------------------------------------------------------+ +| conda | `Conda Downloads `__ | ++-------+------------------------------------------------------------------+ BAMnostic ========= @@ -37,44 +44,44 @@ Through the ``conda`` package manager (`Anaconda Cloud `__) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: bash - pip install bamnostic + pip install bamnostic - # or, if you don't have superuser access - pip install --user bamnostic + # or, if you don't have superuser access + pip install --user bamnostic Through pip+Github ~~~~~~~~~~~~~~~~~~ .. code:: bash - # again, use --user if you don't have superuser access - pip install -e git+https://github.com/betteridiot/bamnostic.git + # again, use --user if you don't have superuser access + pip install -e git+https://github.com/betteridiot/bamnostic.git#egg=bamnostic - # or, if you don't have superuser access - pip install --user -e git+https://github.com/betteridiot/bamnostic.git + # or, if you don't have superuser access + pip install --user -e git+https://github.com/betteridiot/bamnostic.git#bamnostic#egg=bamnostic Traditional GitHub clone ~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: bash - git clone https://github.com/betteridiot/bamnostic.git - cd bamnostic - pip install -e . + git clone https://github.com/betteridiot/bamnostic.git + cd bamnostic + pip install -e . - # or, if you don't have superuser access - pip install --user -e . + # or, if you don't have superuser access + pip install --user -e . -------------- @@ -89,7 +96,7 @@ Importing .. code:: python - >>> import bamnostic as bs + >>> import bamnostic as bs Loading your BAM file (Note: CRAM and CSI formats are not supported at this time) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -103,7 +110,7 @@ within the package. .. code:: python - >>> bam = bs.AlignmentFile(bs.example_bam, 'rb') + >>> bam = bs.AlignmentFile(bs.example_bam, 'rb') Get the header ~~~~~~~~~~~~~~ @@ -115,65 +122,65 @@ length tuple values. .. code:: python - >>> bam.header - {0: ('chr1', 1575), 1: ('chr2', 1584)} + >>> bam.header + {0: ('chr1', 1575), 1: ('chr2', 1584)} Data validation through ``head()`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python - >>>bam.head(n=2) - [EAS56_57:6:190:289:82 69 chr1 100 0 * = 100 0 CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA <<<7<<<;<<<<<<<<8;;<7;4<;<;;;;;94<; MF:C:192, - EAS56_57:6:190:289:82 137 chr1 100 73 35M = 100 0 AGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCAC <<<<<<;<<<<<<<<<<;<<;<<<<;8<6;9;;2; MF:C:64 Aq:C:0 NM:C:0 UQ:C:0 H0:C:1 H1:C:0] + >>>bam.head(n=2) + [EAS56_57:6:190:289:82 69 chr1 100 0 * = 100 0 CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA <<<7<<<;<<<<<<<<8;;<7;4<;<;;;;;94<; MF:C:192, + EAS56_57:6:190:289:82 137 chr1 100 73 35M = 100 0 AGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCAC <<<<<<;<<<<<<<<<<;<<;<<<<;8<6;9;;2; MF:C:64 Aq:C:0 NM:C:0 UQ:C:0 H0:C:1 H1:C:0] Getting the first read ~~~~~~~~~~~~~~~~~~~~~~ .. code:: python - >>> first_read = next(bam) - >>> print(first_read) - EAS56_57:6:190:289:82 69 chr1 100 0 * = 100 0 CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA <<<7<<<;<<<<<<<<8;;<7;4<;<;;;;;94<; MF:C:192 + >>> first_read = next(bam) + >>> print(first_read) + EAS56_57:6:190:289:82 69 chr1 100 0 * = 100 0 CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA <<<7<<<;<<<<<<<<8;;<7;4<;<;;;;;94<; MF:C:192 Exploring the read ~~~~~~~~~~~~~~~~~~ .. code:: python - # read name - >>> print(first_read.read_name) - EAS56_57:6:190:289:82 + # read name + >>> print(first_read.read_name) + EAS56_57:6:190:289:82 - # 0-based position - >>> print(first_read.pos) - 99 + # 0-based position + >>> print(first_read.pos) + 99 - # nucleotide sequence - >>> print(first_read.seq) - CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA + # nucleotide sequence + >>> print(first_read.seq) + CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA - # Read FLAG - >>> print(first_read.flag) - 69 + # Read FLAG + >>> print(first_read.flag) + 69 - # decoded FLAG - >>> bs.utils.flag_decode(first_read.flag) - [(1, 'read paired'), (4, 'read unmapped'), (64, 'first in pair')] + # decoded FLAG + >>> bs.utils.flag_decode(first_read.flag) + [(1, 'read paired'), (4, 'read unmapped'), (64, 'first in pair')] Random Access ~~~~~~~~~~~~~ .. code:: python - >>> for i, read in enumerate(bam.fetch('chr2', 1, 100)): - ... if i >= 3: - ... break - ... print(read) + >>> for i, read in enumerate(bam.fetch('chr2', 1, 100)): + ... if i >= 3: + ... break + ... print(read) - B7_591:8:4:841:340 73 chr2 1 99 36M * 0 0 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAA <<<<<<<<;<<<<<<<<;<<<<<;<;:<<<<<<<;; MF:C:18 Aq:C:77 NM:C:0 UQ:C:0 H0:C:1 H1:C:0 - EAS54_67:4:142:943:582 73 chr2 1 99 35M * 0 0 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA <<<<<<;<<<<<<:<<;<<<<;<<<;<<<:;<<<5 MF:C:18 Aq:C:41 NM:C:0 UQ:C:0 H0:C:1 H1:C:0 - EAS54_67:6:43:859:229 153 chr2 1 66 35M * 0 0 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA +37<=<.;<<7.;77<5<<0<<<;<<<27<<<<<< MF:C:32 Aq:C:0 NM:C:0 UQ:C:0 H0:C:1 H1:C:0 + B7_591:8:4:841:340 73 chr2 1 99 36M * 0 0 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAA <<<<<<<<;<<<<<<<<;<<<<<;<;:<<<<<<<;; MF:C:18 Aq:C:77 NM:C:0 UQ:C:0 H0:C:1 H1:C:0 + EAS54_67:4:142:943:582 73 chr2 1 99 35M * 0 0 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA <<<<<<;<<<<<<:<<;<<<<;<<<;<<<:;<<<5 MF:C:18 Aq:C:41 NM:C:0 UQ:C:0 H0:C:1 H1:C:0 + EAS54_67:6:43:859:229 153 chr2 1 66 35M * 0 0 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA +37<=<.;<<7.;77<5<<0<<<;<<<27<<<<<< MF:C:32 Aq:C:0 NM:C:0 UQ:C:0 H0:C:1 H1:C:0 -------------- @@ -195,10 +202,10 @@ Format `__): .. code:: bash - @SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36 - GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC - +SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36 - IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC + @SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36 + GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC + +SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36 + IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC Each entry details the read name, lenght, string representation, and quality of each aligned base along the read. ### SAM/BAM Format The data @@ -214,18 +221,18 @@ format `__): .. code:: bash - @HD VN:1.5 SO:coordinate - @SQ SN:ref LN:45 - r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * - r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA * - r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0; - r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC * - r003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1; - r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1 + @HD VN:1.5 SO:coordinate + @SQ SN:ref LN:45 + r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * + r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA * + r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0; + r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC * + r003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1; + r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1 There are many benefits to the SAM format: human-readable, each entry is contained to a single line (supporting simple stream analysis), concise -description of the read's quality and position, and a file header +description of the read’s quality and position, and a file header metadata that supports integrity and reproducibility. Additionally, a compressed form of the SAM format was designed in parallel. It is called the Binary Alignment Map @@ -248,7 +255,7 @@ The BAI file, often produced via to be sorted prior to indexing. Using a modified R-tree binning strategy, each reference contig is divided into sequential, non-overlapping bins. That is a parent bin may contain numerous -children, but none of the children bins overlap another's assigned +children, but none of the children bins overlap another’s assigned interval. Each BAM entry is then assigned to the bin that fully contains it. A visual description of the binning strategy can be found `here `__. Each bin is @@ -256,7 +263,7 @@ comprised of chunks, and each chunk contains its respective start and stop byte positions within the BAM file. In addition to the bin index, a linear index is produced as well. Again, the reference contig is divided into equally sized windows (covering ≈16Kbp/each). Along those windows, -the start offset of the first read that ***overlaps*** that window is +the start offset of the first read that **overlaps** that window is stored. Now, given a region of interest, the first bin that overlaps the region is looked up. The chunks in the bin are stored as *virtual offsets*. A virtual offset is a 64-bit unsigned integer that is @@ -268,14 +275,14 @@ calculated by: .. code:: python - virtual_offset = coffset << 16 | uoffset + virtual_offset = coffset << 16 | uoffset Similarly, the complement of the above is as follows: .. code:: python - coffset = virtual_offset >> 16 - uoffset = virtual_offset ^ (coffset << 16) + coffset = virtual_offset >> 16 + uoffset = virtual_offset ^ (coffset << 16) A simple seek call against the BAM file will put the head at the start of your region of interest. @@ -290,7 +297,7 @@ BAM files is to use the program known as `samtools `__. The maintainers of samtools have done a tremendous job of providing distributions that work on a multitude of operating systems. While samtools is powerful, as a -command line interface, it is also limited in that it doesn't really +command line interface, it is also limited in that it doesn’t really afford the ability to perform real-time dynamic processing of reads (without requiring many system calls to samtools). Due to its general nature and inherent readability, a package was written in Python called @@ -301,10 +308,10 @@ However, the foundation of these tools is built on a C-API called compiled in a Windows environment. By extension, neither can pysam. In building a tool for genomic visualization, I wanted it to be platform agnostic. This is precisely when I found out that the tools I had -planned to use as a backend did not work on Windows...the most prevalent +planned to use as a backend did not work on Windows…the most prevalent operation system in the end-user world. So, I wrote **bamnostic**. As of this writing, bamnostic is OS-agnostic and written completely in Pure -Python--requiring only the standard library (and ``pytest`` for the test +Python–requiring only the standard library (and ``pytest`` for the test suite). Special care was taken to ensure that it would run on all versions of CPython 2.7 or greater. Additionally, it runs in both stable versions of PyPy. While it may perform slower than its C counterparts, @@ -332,31 +339,8 @@ Community Guidelines: --------------------- Eagerly accepting PRs for improvements, optimizations, or features. For -any questions or issues, please feel free to make a post to bamnostic's +any questions or issues, please feel free to make a post to bamnostic’s `Issue tracker `__ on github or read over our `CONTRIBUTING `__ documentation. - -.. |Build Status| image:: https://travis-ci.org/betteridiot/bamnostic.svg?branch=master - :target: https://travis-ci.org/betteridiot/bamnostic -.. |noarch| image:: https://img.shields.io/circleci/project/github/conda-forge/bamnostic-feedstock/master.svg?label=noarch - :target: https://circleci.com/gh/conda-forge/bamnostic-feedstock -.. |Documentation Status| image:: https://readthedocs.org/projects/bamnostic/badge/?version=latest - :target: https://bamnostic.readthedocs.io/en/latest/?badge=latest -.. |Conda Version| image:: https://img.shields.io/conda/vn/conda-forge/bamnostic.svg - :target: https://anaconda.org/conda-forge/bamnostic -.. |PyPI version| image:: https://badge.fury.io/py/bamnostic.svg - :target: https://badge.fury.io/py/bamnostic -.. |Maintainability| image:: https://api.codeclimate.com/v1/badges/d7e36e72f109c598c86d/maintainability - :target: https://codeclimate.com/github/betteridiot/bamnostic/maintainability -.. |status| image:: http://joss.theoj.org/papers/9952b35bbb30ca6c01e6a27b80006bd8/status.svg - :target: http://joss.theoj.org/papers/9952b35bbb30ca6c01e6a27b80006bd8 -.. |DOI| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1341959.svg - :target: https://doi.org/10.5281/zenodo.1341959 -.. |License| image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg - :target: https://github.com/betteridiot/bamnostic/blob/master/LICENSE -.. |Downloads| image:: http://pepy.tech/badge/bamnostic - :target: http://pepy.tech/project/bamnostic -.. |Conda Downloads| image:: https://img.shields.io/conda/dn/conda-forge/bamnostic.svg - :target: https://anaconda.org/conda-forge/bamnostic diff --git a/codemeta.json b/codemeta.json index 417d23c..8d41642 100644 --- a/codemeta.json +++ b/codemeta.json @@ -37,8 +37,8 @@ "description": "BAMnostic is a Pure Python OS, version, and runtime agnostic BAM file parser", "keywords": "BAM, pysam, genomics, genetics, htslib, samtools", "license": "https://github.com/betteridiot/bamnostic/blob/master/LICENSE", - "softwareVersion": "v0.9.5", - "version": "v0.9.5", + "softwareVersion": "v1.0.0, + "version": "v1.0.0", "readme": "https://github.com/betteridiot/bamnostic/blob/master/README.md", "buildInstructions": "https://github.com/betteridiot/bamnostic/blob/master/README.md", "issueTracker": "https://github.com/betteridiot/bamnostic/issues", diff --git a/docs/paper/codemeta.json b/docs/paper/codemeta.json index c27382e..b0a1f13 100644 --- a/docs/paper/codemeta.json +++ b/docs/paper/codemeta.json @@ -37,8 +37,8 @@ "description": "BAMnostic: an OS-agnostic toolkit for genomic sequence analysis", "keywords": "BAM, pysam, genomics, genetics, htslib, samtools", "license": "https://github.com/betteridiot/bamnostic/blob/master/LICENSE", - "softwareVersion": "v0.9.5", - "version": "v0.9.5", + "softwareVersion": "v1.0.0", + "version": "v1.0.0", "readme": "https://github.com/betteridiot/bamnostic/blob/master/README.md", "buildInstructions": "https://github.com/betteridiot/bamnostic/blob/master/README.md", "issueTracker": "https://github.com/betteridiot/bamnostic/issues", diff --git a/setup.py b/setup.py index 5ff996e..38813b3 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def readme(): setup( name='bamnostic', - version='0.9.5', + version='1.0.0', description='Pure Python, OS-agnostic Binary Alignment Map (BAM) random access and parsing tool', long_description=readme(), url='https://github.com/betteridiot/bamnostic/',