diff --git a/MANIFEST.in b/MANIFEST.in
index 555426d..7baa7c2 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,3 +5,4 @@ include ./docs/build/*
include ./CONTRIBUTING*
include ./CODE_OF_CONDUCT*
include ./requirements.txt
+include ./version
diff --git a/codemeta.json b/codemeta.json
index ef80a6d..f693555 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -37,8 +37,8 @@
"description": "BAMnostic is a Pure Python OS, version, and runtime agnostic BAM file parser",
"keywords": "BAM, pysam, genomics, genetics, htslib, samtools",
"license": "https://github.com/betteridiot/bamnostic/blob/master/LICENSE",
- "softwareVersion": "v1.0.8,
- "version": "v1.0.8",
+ "softwareVersion": "v1.0.10,
+ "version": "v1.0.10",
"readme": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"buildInstructions": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"issueTracker": "https://github.com/betteridiot/bamnostic/issues",
diff --git a/docs/build/doctrees/bamnostic.doctree b/docs/build/doctrees/bamnostic.doctree
index a562bb0..d51a43e 100644
Binary files a/docs/build/doctrees/bamnostic.doctree and b/docs/build/doctrees/bamnostic.doctree differ
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 6b95f05..b8d6e95 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
index 742c143..1415b58 100644
Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ
diff --git a/docs/build/doctrees/install.doctree b/docs/build/doctrees/install.doctree
index 967310e..33793d3 100644
Binary files a/docs/build/doctrees/install.doctree and b/docs/build/doctrees/install.doctree differ
diff --git a/docs/build/doctrees/quickstart.doctree b/docs/build/doctrees/quickstart.doctree
index 3346402..a1a74da 100644
Binary files a/docs/build/doctrees/quickstart.doctree and b/docs/build/doctrees/quickstart.doctree differ
diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo
index a51dd23..659a44d 100644
--- a/docs/build/html/.buildinfo
+++ b/docs/build/html/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: da346d61a57fbe238b77db2d99796729
+config: 7fa24b6203ce799d5a84455c9dbe3c8e
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/build/html/_modules/bamnostic/bai.html b/docs/build/html/_modules/bamnostic/bai.html
index 145cea6..7cf8bcd 100644
--- a/docs/build/html/_modules/bamnostic/bai.html
+++ b/docs/build/html/_modules/bamnostic/bai.html
@@ -8,7 +8,7 @@
-
bamnostic.bai — bamnostic 1.0.9 documentation
+ bamnostic.bai — bamnostic 1.0.10 documentation
@@ -59,7 +59,7 @@
- 1.0.9
+ 1.0.10
diff --git a/docs/build/html/_modules/bamnostic/bgzf.html b/docs/build/html/_modules/bamnostic/bgzf.html
index c6eeba7..05e7177 100644
--- a/docs/build/html/_modules/bamnostic/bgzf.html
+++ b/docs/build/html/_modules/bamnostic/bgzf.html
@@ -8,7 +8,7 @@
- bamnostic.bgzf — bamnostic 1.0.9 documentation
+ bamnostic.bgzf — bamnostic 1.0.10 documentation
@@ -59,7 +59,7 @@
- 1.0.9
+ 1.0.10
diff --git a/docs/build/html/_modules/bamnostic/core.html b/docs/build/html/_modules/bamnostic/core.html
index 7bec949..2685ea1 100644
--- a/docs/build/html/_modules/bamnostic/core.html
+++ b/docs/build/html/_modules/bamnostic/core.html
@@ -8,7 +8,7 @@
- bamnostic.core — bamnostic 1.0.9 documentation
+ bamnostic.core — bamnostic 1.0.10 documentation
@@ -59,7 +59,7 @@
- 1.0.9
+ 1.0.10
diff --git a/docs/build/html/_modules/bamnostic/utils.html b/docs/build/html/_modules/bamnostic/utils.html
index 4cfcbfc..6891686 100644
--- a/docs/build/html/_modules/bamnostic/utils.html
+++ b/docs/build/html/_modules/bamnostic/utils.html
@@ -8,7 +8,7 @@
- bamnostic.utils — bamnostic 1.0.9 documentation
+ bamnostic.utils — bamnostic 1.0.10 documentation
@@ -59,7 +59,7 @@
- 1.0.9
+ 1.0.10
diff --git a/docs/build/html/_modules/index.html b/docs/build/html/_modules/index.html
index 57be0c7..c8d7fb8 100644
--- a/docs/build/html/_modules/index.html
+++ b/docs/build/html/_modules/index.html
@@ -8,7 +8,7 @@
- Overview: module code — bamnostic 1.0.9 documentation
+ Overview: module code — bamnostic 1.0.10 documentation
@@ -59,7 +59,7 @@
- 1.0.9
+ 1.0.10
diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css
index 0807176..53acd09 100644
--- a/docs/build/html/_static/basic.css
+++ b/docs/build/html/_static/basic.css
@@ -231,6 +231,16 @@ a.headerlink {
visibility: hidden;
}
+a.brackets:before,
+span.brackets > a:before{
+ content: "[";
+}
+
+a.brackets:after,
+span.brackets > a:after {
+ content: "]";
+}
+
h1:hover > a.headerlink,
h2:hover > a.headerlink,
h3:hover > a.headerlink,
@@ -391,6 +401,16 @@ table.citation td {
border-bottom: none;
}
+th > p:first-child,
+td > p:first-child {
+ margin-top: 0px;
+}
+
+th > p:last-child,
+td > p:last-child {
+ margin-bottom: 0px;
+}
+
/* -- figures --------------------------------------------------------------- */
div.figure {
@@ -460,11 +480,57 @@ ol.upperroman {
list-style: upper-roman;
}
+li > p:first-child {
+ margin-top: 0px;
+}
+
+li > p:last-child {
+ margin-bottom: 0px;
+}
+
+dl.footnote > dt,
+dl.citation > dt {
+ float: left;
+}
+
+dl.footnote > dd,
+dl.citation > dd {
+ margin-bottom: 0em;
+}
+
+dl.footnote > dd:after,
+dl.citation > dd:after {
+ content: "";
+ clear: both;
+}
+
+dl.field-list {
+ display: flex;
+ flex-wrap: wrap;
+}
+
+dl.field-list > dt {
+ flex-basis: 20%;
+ font-weight: bold;
+ word-break: break-word;
+}
+
+dl.field-list > dt:after {
+ content: ":";
+}
+
+dl.field-list > dd {
+ flex-basis: 70%;
+ padding-left: 1em;
+ margin-left: 0em;
+ margin-bottom: 0em;
+}
+
dl {
margin-bottom: 15px;
}
-dd p {
+dd > p:first-child {
margin-top: 0px;
}
@@ -537,6 +603,12 @@ dl.glossary dt {
font-style: oblique;
}
+.classifier:before {
+ font-style: normal;
+ margin: 0.5em;
+ content: ":";
+}
+
abbr, acronym {
border-bottom: dotted 1px;
cursor: help;
diff --git a/docs/build/html/_static/doctools.js b/docs/build/html/_static/doctools.js
index 344db17..b33f87f 100644
--- a/docs/build/html/_static/doctools.js
+++ b/docs/build/html/_static/doctools.js
@@ -87,14 +87,13 @@ jQuery.fn.highlightText = function(text, className) {
node.nextSibling));
node.nodeValue = val.substr(0, pos);
if (isInSVG) {
- var bbox = span.getBBox();
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
- rect.x.baseVal.value = bbox.x;
+ var bbox = node.parentElement.getBBox();
+ rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute('class', className);
- var parentOfText = node.parentNode.parentNode;
addItems.push({
"parent": node.parentNode,
"target": rect});
diff --git a/docs/build/html/_static/documentation_options.js b/docs/build/html/_static/documentation_options.js
index f647e29..a07c9f2 100644
--- a/docs/build/html/_static/documentation_options.js
+++ b/docs/build/html/_static/documentation_options.js
@@ -1,10 +1,10 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
- VERSION: '1.0.9',
+ VERSION: '1.0.10',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: '.txt',
- NAVIGATION_WITH_KEYS: false,
+ NAVIGATION_WITH_KEYS: false
};
\ No newline at end of file
diff --git a/docs/build/html/_static/searchtools.js b/docs/build/html/_static/searchtools.js
index 5ff3180..4c58264 100644
--- a/docs/build/html/_static/searchtools.js
+++ b/docs/build/html/_static/searchtools.js
@@ -36,8 +36,10 @@ if (!Scorer) {
// query found in title
title: 15,
+ partialTitle: 7,
// query found in terms
- term: 5
+ term: 5,
+ partialTerm: 2
};
}
@@ -56,6 +58,14 @@ var Search = {
_queued_query : null,
_pulse_status : -1,
+ htmlToText : function(htmlString) {
+ var htmlElement = document.createElement('span');
+ htmlElement.innerHTML = htmlString;
+ $(htmlElement).find('.headerlink').remove();
+ docContent = $(htmlElement).find('[role=main]')[0];
+ return docContent.textContent || docContent.innerText;
+ },
+
init : function() {
var params = $.getQueryParameters();
if (params.q) {
@@ -65,16 +75,6 @@ var Search = {
}
},
- loadIndex : function(url) {
- $.ajax({type: "GET", url: url, data: null,
- dataType: "script", cache: true,
- complete: function(jqxhr, textstatus) {
- if (textstatus != "success") {
- document.getElementById("searchindexloader").src = url;
- }
- }});
- },
-
setIndex : function(index) {
var q;
this._index = index;
@@ -120,7 +120,7 @@ var Search = {
this.out = $('#search-results');
this.title = $('' + _('Searching') + '
').appendTo(this.out);
this.dots = $('').appendTo(this.title);
- this.status = $('').appendTo(this.out);
+ this.status = $('
').appendTo(this.out);
this.output = $('').appendTo(this.out);
$('#search-progress').text(_('Preparing search...'));
@@ -259,11 +259,7 @@ var Search = {
displayNextItem();
});
} else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) {
- var suffix = DOCUMENTATION_OPTIONS.SOURCELINK_SUFFIX;
- if (suffix === undefined) {
- suffix = '.txt';
- }
- $.ajax({url: DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' + item[5] + (item[5].slice(-suffix.length) === suffix ? '' : suffix),
+ $.ajax({url: DOCUMENTATION_OPTIONS.URL_ROOT + item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX,
dataType: "text",
complete: function(jqxhr, textstatus) {
var data = jqxhr.responseText;
@@ -385,6 +381,19 @@ var Search = {
{files: terms[word], score: Scorer.term},
{files: titleterms[word], score: Scorer.title}
];
+ // add support for partial matches
+ if (word.length > 2) {
+ for (var w in terms) {
+ if (w.match(word) && !terms[word]) {
+ _o.push({files: terms[w], score: Scorer.partialTerm})
+ }
+ }
+ for (var w in titleterms) {
+ if (w.match(word) && !titleterms[word]) {
+ _o.push({files: titleterms[w], score: Scorer.partialTitle})
+ }
+ }
+ }
// no match but word was a required one
if ($u.every(_o, function(o){return o.files === undefined;})) {
@@ -424,8 +433,12 @@ var Search = {
var valid = true;
// check if all requirements are matched
- if (fileMap[file].length != searchterms.length)
- continue;
+ var filteredTermCount = // as search terms with length < 3 are discarded: ignore
+ searchterms.filter(function(term){return term.length > 2}).length
+ if (
+ fileMap[file].length != searchterms.length &&
+ fileMap[file].length != filteredTermCount
+ ) continue;
// ensure that none of the excluded terms is in the search result
for (i = 0; i < excluded.length; i++) {
@@ -456,7 +469,8 @@ var Search = {
* words. the first one is used to find the occurrence, the
* latter for highlighting it.
*/
- makeSearchSummary : function(text, keywords, hlwords) {
+ makeSearchSummary : function(htmlText, keywords, hlwords) {
+ var text = Search.htmlToText(htmlText);
var textLower = text.toLowerCase();
var start = 0;
$.each(keywords, function() {
diff --git a/docs/build/html/bamnostic.html b/docs/build/html/bamnostic.html
index 8a888ed..48871ea 100644
--- a/docs/build/html/bamnostic.html
+++ b/docs/build/html/bamnostic.html
@@ -8,7 +8,7 @@
- bamnostic package — bamnostic 1.0.9 documentation
+ bamnostic package — bamnostic 1.0.10 documentation
@@ -61,7 +61,7 @@
- 1.0.9
+ 1.0.10
@@ -171,18 +171,18 @@
outside of the standard library. Therefore, bamnostic will run on all stable
versions of PyPy.
-
Note
-
SAM and CRAM support is not yet implemented
+
Note
+
SAM and CRAM support is not yet implemented
The three main classes of bamnostic are:
-bamnostic.AlignmentFile
: the BAM file handler
-bamnostic.AlignedSegment
: an aligned read object interface
-bamnostic.bai.Bai
: if the BAM file has an associated index file (preferred), this is the file handler for it.
+bamnostic.AlignmentFile
: the BAM file handler
+bamnostic.AlignedSegment
: an aligned read object interface
+bamnostic.bai.Bai
: if the BAM file has an associated index file (preferred), this is the file handler for it.
-
Note
-
Within the scope of personal research, reading BAM files is the only fully
+
Note
+
Within the scope of personal research, reading BAM files is the only fully
supported IO. The skeleton for writing BAM files is present, just not connected.
This code is part of the bamnostic distribution and governed by its
@@ -195,20 +195,16 @@
class bamnostic.core.
Cigar
(op_code, n_op, op_id, op_name)
Bases: tuple
namedtuple
for handling CIGAR data
-
-
-
-
-Parameters: |
-- op_code (int) – CIGAR operation index
-- n_op (int) – number of operations for a given op_code
-- op_id (str) – the string representation of the CIGAR representation (‘MIDNSHP=XB’)
-- op_name (str) – Longer string name for operation
+
+- Parameters
+
+op_code (int) – CIGAR operation index
+n_op (int) – number of operations for a given op_code
+op_id (str) – the string representation of the CIGAR representation (‘MIDNSHP=XB’)
+op_name (str) – Longer string name for operation
- |
-
-
-
+
+
-
n_op
@@ -244,18 +240,17 @@
is the base-calling error probability. Phred quality scores tend range
from 10 to 60. These qualities are then offset by 33 and ASCII-encoded
for readability and storage.
-
-
-
-
-Parameters: | qual_string (str or bytes ) – Phred quality scores without offset |
-
-Returns: | ASCII-encoded Phred scores offest by adding 33 to base score. |
-
-Return type: | (str) |
-
-
-
+
+- Parameters
+qual_string (str
or bytes
) – Phred quality scores without offset
+
+- Returns
+ASCII-encoded Phred scores offest by adding 33 to base score.
+
+- Return type
+(str)
+
+
Examples
>>> qual_score = ''
>>> ''.join(offset_qual(qual_score))
@@ -269,28 +264,24 @@
class bamnostic.core.
AlignmentFile
(filepath_or_object, mode='rb', max_cache=128, index_filename=None, filename=None, check_header=False, check_sq=True, reference_filename=None, filepath_index=None, require_index=False, duplicate_filehandle=None, ignore_truncation=False, compresslevel=6, ignore_overwrite=False, copy_header=None, header=b'', reference_names=None, reference_lengths=None)[source]
Bases: bamnostic.bam.BamReader
, bamnostic.bam.BamWriter
Wrapper to allow drop in replacement for BAM functionality in a pysam
-like API.
-
-
-
-
-Parameters: |
-- filepath_or_object (str |
file ) – the path or file object of the BAM file
-- mode (str) – Mode for reading. BAM files are binary by nature (default: ‘rb’).
-- max_cache (int) – number of desired LRU cache size, preferably a multiple of 2 (default: 128).
-- index_filename (str) – path to index file (BAI) if it is named differently than the BAM file (default: None).
-- filename (str |
file ) – synonym for filepath_or_object
-- check_header (bool) – Obsolete method maintained for backwards compatibility (default: False)
-- check_sq (bool) – Inspect BAM file for @SQ entries within the header
-- reference_filename (str) – Not implemented. Maintained for backwards compatibility
-- filepath_index (str) – synonym for index_filename
-- require_index (bool) – require the presence of an index file or raise (default: False)
-- duplicate_filehandle (bool) – Not implemented. Raises warning if True.
-- ignore_truncation (bool) – Whether or not to allow trucated file processing (default: False).
+
+- Parameters
+
+filepath_or_object (str | file ) – the path or file object of the BAM file
+mode (str) – Mode for reading. BAM files are binary by nature (default: ‘rb’).
+max_cache (int) – number of desired LRU cache size, preferably a multiple of 2 (default: 128).
+index_filename (str) – path to index file (BAI) if it is named differently than the BAM file (default: None).
+filename (str | file ) – synonym for filepath_or_object
+check_header (bool) – Obsolete method maintained for backwards compatibility (default: False)
+check_sq (bool) – Inspect BAM file for @SQ entries within the header
+reference_filename (str) – Not implemented. Maintained for backwards compatibility
+filepath_index (str) – synonym for index_filename
+require_index (bool) – require the presence of an index file or raise (default: False)
+duplicate_filehandle (bool) – Not implemented. Raises warning if True.
+ignore_truncation (bool) – Whether or not to allow trucated file processing (default: False).
- |
-
-
-
+
+
@@ -479,40 +470,35 @@
-
get_tag
(tag, with_value_type=False)[source]
Gets the value associated with a given tag key.
-
-
-
-
-Parameters: |
-- tag (str) – the tag of interest
-- with_value_type (bool) – return what kind of value the tag
+
+- Parameters
+-
-
|
-
-Returns: | the value associated with a given tag or the value and type
+
+ - Returns
+the value associated with a given tag or the value and type
of value (as seen in BAM format)
- |
-
-
-
+
+
-
get_tags
(with_value_type=False)[source]
Returns all the tags for a given read
-
-
-
-
-Parameters: | with_value_type (bool) – return the tag value type (as defined by BAM format) |
-
-Returns: | list of tag tuples (with or without tag value type) |
-
-Return type: | f_tags(list ) |
-
-
-
+
+- Parameters
+with_value_type (bool) – return the tag value type (as defined by BAM format)
+
+- Returns
+list of tag tuples (with or without tag value type)
+
+- Return type
+f_tags(list
)
+
+
@@ -520,17 +506,15 @@
get_cigar_stats
()[source]
Gets the counts of each CIGAR operation in the read and number of
nucleotides related to those given operations.
-
-
-
-
-Returns: | list of CIGAR operation counts
-nt_counts (list ): list of nucleotide counts for each operation |
-
-Return type: | op_blocks (list ) |
-
-
-
+
+- Returns
+list of CIGAR operation counts
+nt_counts (list
): list of nucleotide counts for each operation
+
+- Return type
+op_blocks (list
)
+
+
@@ -540,32 +524,28 @@
Uses the CIGAR string and MD tag to recreate the reference sequence associated
with the aligned segment. This is done without the need for looking up
the reference genome.
-
-
-
-
-Returns: | generated reference sequence |
-
-Return type: | (str) |
-
-Raises: | KeyError – if read does not contain MD tag |
-
-
-
+
+- Returns
+generated reference sequence
+
+- Return type
+(str)
+
+- Raises
+KeyError – if read does not contain MD tag
+
+
-
to_bam
()[source]
Writes the alignment record to a BAM file
-
-
-
-
-Parameters: | bam_file (string or bamnostic.bam.BamWriter ) – BAM file path or open bam file in a write mode |
-
-
-
+
+- Parameters
+bam_file (string or bamnostic.bam.BamWriter
) – BAM file path or open bam file in a write mode
+
+
@@ -635,27 +615,23 @@
it can be used to copy any BGZF block within a BAM file that starts at
the given offset.
-
Note
-
Does not progress file cursor position.
+
Note
+
Does not progress file cursor position.
-
-
-
-
-Parameters: |
-- handle (
file ) – open BAM file
-- offset (int) – offset of BGZF block (default: 0)
+
+- Parameters
+-
-
|
-
-Returns: | Complete BGZF block
- |
-
-Raises: | ValueError – if the BGZF block header is malformed
- |
-
-
-
+
+
- Returns
+
Complete BGZF block
+
+
- Raises
+
ValueError – if the BGZF block header is malformed
+
+
Example
>>> bam_header = get_block(bamnostic.example_bam)
>>> try:
@@ -688,27 +664,23 @@
within the deflated BGZF block where the position starts. The virtual offset
is defined as
virtual_offset = coffset << 16 | uoffset
-
-
-
-
-Parameters: | virtual_offset (int) – 64-bit unsigned composite byte offset
- |
-
-Returns: | an echo of the new position
- |
-
-Return type: | virtual_offset (int)
- |
-
-Raises: |
-ValueError – if within block offset is more than block size
-AssertionError – if the start position is not the block start position
+
+- Parameters
+virtual_offset (int) – 64-bit unsigned composite byte offset
+
+- Returns
+an echo of the new position
+
+- Return type
+virtual_offset (int)
+
+- Raises
+-
-
|
-
-
-
+
+
Example
>>> bam = bamnostic.AlignmentFile(bamnostic.example_bam, 'rb')
>>> bam.seek(10)
@@ -727,27 +699,23 @@
-
read
(size=-1)[source]
Read method for the BGZF module.
-
-
-
-
-Parameters: | size (int) – the number of bytes to read from file. Advances the cursor.
- |
-
-Returns: | byte string of length size
- |
-
-Return type: | data (bytes )
- |
-
-Raises: |
-NotImplementedError – if the user tries to read the whole file
-AssertionError – if read does not return any data
+
+- Parameters
+size (int) – the number of bytes to read from file. Advances the cursor.
+
+- Returns
+byte string of length size
+
+- Return type
+data (bytes )
+
+- Raises
+-
-
|
-
-
-
+
+
@@ -769,8 +737,8 @@
seekable
()[source]
Return True indicating the BGZF supports random access.
-
Note
-
Modified from original Bio.BgzfReader: checks to see if BAM
+
Note
+
Modified from original Bio.BgzfReader: checks to see if BAM
file has associated index file (BAI)
@@ -942,48 +910,40 @@
-
bamnostic.bai.
reg2bin
(rbeg, rend)[source]
Finds the largest superset bin of region. Numeric values taken from hts-specs
-
-
-
-
-Parameters: |
-- rbeg (int) – inclusive beginning position of region
-- rend (int) – exclusive end position of region
+
+- Parameters
+-
-
|
-
-Returns: | distinct bin ID for largest superset bin of region
- |
-
-Return type: | (int)
- |
-
-
-
+
+
- Returns
+
distinct bin ID for largest superset bin of region
+
+
- Return type
+
(int)
+
+
-
bamnostic.bai.
reg2bins
(rbeg, rend)[source]
Generates bin ids which overlap the specified region.
-
-
-
-
-Parameters: |
-- rbeg (int) – inclusive beginning position of region
-- rend (int) – exclusive end position of region
+
+- Parameters
+-
-
|
-
-Yields: | (int) – bin IDs for overlapping bins of region
- |
-
-Raises: | AssertionError (Exception) – if the range is malformed or invalid
- |
-
-
-
+
+- Yields
+(int) – bin IDs for overlapping bins of region
+
+- Raises
+AssertionError (Exception) – if the range is malformed or invalid
+
+
@@ -1001,84 +961,66 @@
-
_io
opened BAI file object
-
-
-
-
-Type: | fileObject |
-
-
-
+
+- Type
+fileObject
+
+
-
_LINEAR_INDEX_WINDOW
constant of the linear interval window size
-
+
+- Type
+int
+
+
-
_UNMAP_BIN
constant for bin ID of unmapped read stats
-
+
+- Type
+int
+
+
-
magic
first 4 bytes of file. Must be equal to b’BAI’
-
+
+- Type
+bytes
+
+
-
n_refs
number of references in BAI
-
+
+- Type
+int
+
+
-
unmapped
dictionary of the unmapped read stats by each reference
-
+
+- Type
+dict
+
+
@@ -1087,56 +1029,44 @@
dictionary of the current reference loaded into memory.
It contains the a dictionary of bin IDs and their respective
chunks, and a list of linear intervals.
-
-
-
-
-Type: | None|dict |
-
-
-
+
+- Type
+None|dict
+
+
-
ref_indices
dictionary of reference ids and their start/stop offsets within the BAI file
-
+
+- Type
+dict
+
+
-
n_no_coord
if present in BAI, is the number of reads that have no coordinates
-
-
-
-
-Type: | None|int |
-
-
-
+
+- Type
+None|int
+
+
-
_last_pos
used for indexing, the byte position of the file head.
-
+
+- Type
+int
+
+
@@ -1150,22 +1080,21 @@
Note: a special case of a chunk is in any Bin labeled as 37450.
These bins always contain 2 chunks that provide the statistics
of the number of reads that are unmapped to that reference.
-
-
-
-
-Parameters: | n_chunks (int) – number of chunks to be unpacked from stream |
-
-Returns: |
-- a list of Chunk objects with the attributes of
-- chunks[i] are .voffset_beg and voffset_end
+
+- Parameters
+n_chunks (int) – number of chunks to be unpacked from stream
+
+- Returns
+
+- a list of Chunk objects with the attributes of
chunks[i] are .voffset_beg and voffset_end
+
+
+
+
+- Return type
+chunks (list)
+
- |
-
-Return type: | chunks (list) |
-
-
-
@@ -1180,18 +1109,17 @@
Note: a caveat to using linear interval with long reads: A long read can
span multiple linear intervals. As such, the current encoding could potentially shift
the expected region of interest to the left more than expected.
-
-
-
-
-Parameters: | n_int (int) – number of intervals to unpack |
-
-Returns: | list of virtual offsets for n_int number of linear intervals |
-
-Return type: | intervals (list) |
-
-
-
+
+- Parameters
+n_int (int) – number of intervals to unpack
+
+- Returns
+list of virtual offsets for n_int number of linear intervals
+
+- Return type
+intervals (list)
+
+
@@ -1213,24 +1141,24 @@
As a secondary feature, this function will also quickly seek over regions for
the purposes of documenting the start and stop byte offsets of a given reference
block within the file. This is invoked by setting idx=True
-
-
-
-
-Parameters: | n_int (int) – number of bins to be unpacked from stream |
-
-Returns: |
-- None if just indexing the index file or a dictionary
-- of bin_id: chunks pairs
+
+- Parameters
+n_int (int) – number of bins to be unpacked from stream
+
+- Returns
+
+- None if just indexing the index file or a dictionary
of bin_id: chunks pairs
+
+
+
+
+- Return type
+bins (None | dict)
+
+- Raises
+AssertionError (Exception) – if bin 37450 does not contain 2 chunks exactly
+
- |
-
-Return type: | bins (None | dict) |
-
-Raises: | AssertionError (Exception) – if bin 37450 does not contain 2 chunks exactly |
-
-
-
@@ -1247,30 +1175,26 @@
loaded. Because of this constant loading, functools.lru_cache was applied to cache recently
used reference blocks to speed up computation. It is assumed that when querying is done, most
users are looking and just a few references at a time.
-
-
-
-
-Parameters: |
-- ref_id (None|int) – used for random access or indexing the BAI
-- idx (bool) – Flag for setting whether or not to run an index of the BAI
+
+- Parameters
+-
-
|
-
-Returns: | namedtuple containing the byte offsets of the reference start, stop, and number of bins
+
+ - Returns
+namedtuple containing the byte offsets of the reference start, stop, and number of bins
or
Ref: namedtuple containing a dictionary of bins and list of linear intervals
- |
-
-Return type: | RefIdx
- |
-
-Raises: | AssertionError (Exception) – if, when random access is used, the current reference offset
-does not match indexed reference offset.
- |
-
-
-
+
+- Return type
+RefIdx
+
+- Raises
+AssertionError (Exception) – if, when random access is used, the current reference offset
+ does not match indexed reference offset.
+
+
@@ -1278,58 +1202,49 @@
query
(ref_id, start, stop=-1)[source]
Main query function for determining seek offset to BAM section that
AlignedRead objects from specified region start
-
-
-
-
-Parameters: |
-- ref (int) – which reference/chromosome TID
-- start (int) – left most bp position of region (zero-based)
-- stop (int) – right most bp position of region (zero-based)
+
+- Parameters
+
+ref (int) – which reference/chromosome TID
+start (int) – left most bp position of region (zero-based)
+stop (int) – right most bp position of region (zero-based)
- |
-
-Returns: |
-- the voffset_beg of the first chunk given the chunk’s voffset_end
-is greater than the voffset of the linear index that overlaps
+
+- Returns
+
+- the voffset_beg of the first chunk given the chunk’s voffset_end
is greater than the voffset of the linear index that overlaps
the region of interest’s start offset
- |
-
-Return type: | (int)
- |
-
-
-
+
+- Return type
+(int)
+
+
-
seek
(offset=None, whence=0)[source]
Simple seek function for binary files
-
-
-
-
-Parameters: |
-- offset (None|int) – byte offset from whence to move the file head to.
-- whence (int) – 0 := from start of file, 1:= from current position, 2:= from end of file
+
+- Parameters
+
+offset (None|int) – byte offset from whence to move the file head to.
+whence (int) – 0 := from start of file, 1:= from current position, 2:= from end of file
- |
-
-Returns: | new byte position of file head
- |
-
-Return type: | (int)
- |
-
-
-
-
-- Raise:
-- ValueError (Exception): if the offset is not an integer or is not provided
+
+- Returns
+new byte position of file head
+
+- Return type
+(int)
+
+
+
+- Raise:
ValueError (Exception): if the offset is not an integer or is not provided
+
@@ -1337,34 +1252,31 @@
-
read
(size=-1)[source]
Simple read function for binary files
-
-
-
-
-Parameters: | size (int) – number of bytes to read in (default: -1 –whole file) |
-
-Returns: | the number of bytes read from file |
-
-Return type: | (bytes) |
-
-
-
+
+- Parameters
+size (int) – number of bytes to read in (default: -1 –whole file)
+
+- Returns
+the number of bytes read from file
+
+- Return type
+(bytes)
+
+
-
tell
()[source]
Simple tell function for reporting byte position of file head
-
-
-
-
-Returns: | byte position of file head |
-
-Return type: | (int) |
-
-
-
+
+- Returns
+byte position of file head
+
+- Return type
+(int)
+
+
@@ -1394,23 +1306,19 @@
-
bamnostic.utils.
format_warnings
(message, category, filename, lineno, file=None, line=None)[source]
Sets STDOUT warnings
-
-
-
-
-Parameters: |
-- message – the unformatted warning message being reported
-- category (str) – the level of warning (handled by warnings module)
-- filename (str) – filename for logging purposes (defaults to STDOUT)
-- lineno (int) – where the error occurred.
+
+- Parameters
+
+message – the unformatted warning message being reported
+category (str) – the level of warning (handled by warnings module)
+filename (str) – filename for logging purposes (defaults to STDOUT)
+lineno (int) – where the error occurred.
- |
-
-Returns: | formatted warning string
- |
-
-
-
+
+
- Returns
+
formatted warning string
+
+
@@ -1446,36 +1354,36 @@
flags are used for read filtering and QC. The flags are described below.
Additionally, they can be found here
Any given read’s flag is determined by the or (|) operand of all appropriate bit flags.
-
-
-
-
-Parameters: | flag_code (int) – either a standalone integer/bit flag or the read object itself |
-
-Returns: | list of flag and flag description tuples. |
-
-Return type: | (list of tuple ) |
-
-Raises: | ValueError – if provided flag is not a valid entry |
-
-
-
+
+- Parameters
+flag_code (int) – either a standalone integer/bit flag or the read object itself
+
+- Returns
+list of flag and flag description tuples.
+
+- Return type
+(list
of tuple
)
+
+- Raises
+ValueError – if provided flag is not a valid entry
+
+
Example
If a flag is 516 it is comprised of flag 4 and flag 512
>>> flag_decode(516)
[(4, 'read unmapped'), (512, 'QC fail')]
-
+
-
-
-
+
+
+
-
-Int |
-Bit |
-Description |
+
+Int |
+Bit |
+Description |
@@ -1513,8 +1421,8 @@
The second form a submission can take is through positional arguments. Just like
keyword, but ordered such that it makes up a genomic region of interest.
-
Note
-
Positional arguments make utilizing the tid parameter difficult since it is
+
Note
+
Positional arguments make utilizing the tid parameter difficult since it is
the 5th argument of the function signature.
The third form a submission can take is through using a SAM-formatted string. An
@@ -1525,31 +1433,27 @@
start position (be it the whole reference or a specific spot on the reference). Setting
this to True (default: False) will pull all reads to the end of the reference or file,
whichever is first.
-
-
-
-
-Parameters: |
-- contig (str) – name of reference/contig
-- start (int) – start position of region of interest (0-based)
-- stop (int) – stop position of region of interest (0-based)
-- region (str) – SAM region formatted string. Accepts tab-delimited values as well
-- tid (int) – the refID or target id of a reference/contig
-- until_eof (bool) – iterate until end of file (default: False)
+
+- Parameters
+
+contig (str) – name of reference/contig
+start (int) – start position of region of interest (0-based)
+stop (int) – stop position of region of interest (0-based)
+region (str) – SAM region formatted string. Accepts tab-delimited values as well
+tid (int) – the refID or target id of a reference/contig
+until_eof (bool) – iterate until end of file (default: False)
- |
-
-Returns: | region of interest formatted as an object with named attributes.
- |
-
-Return type: | query (bamnostic.utils.Roi )
- |
-
-Raises: | ValueError – if two synonym keywords are set, but contradict each other
- |
-
-
-
+
+- Returns
+region of interest formatted as an object with named attributes.
+
+- Return type
+query (bamnostic.utils.Roi
)
+
+- Raises
+ValueError – if two synonym keywords are set, but contradict each other
+
+
Examples
# Keyword-based
>>> parse_region(contig = ‘chr1’, start = 10, stop = 100)
@@ -1586,21 +1490,17 @@
the format string. Additionally, it can process a file object or byte
stream and implement a read or slice (respectively). Mainly, this is a
quality of life function.
-
-
-
-
-Parameters: |
-- fmt (str) – the string format of the binary data to be unpacked
-- _io – built-in binary format reader (default: io.BufferedRandom)
+
+- Parameters
+-
-
|
-
-Returns: | unpacked contents from _io based on fmt string
- |
-
-
-
+
+- Returns
+unpacked contents from _io based on fmt string
+
+
@@ -1658,14 +1558,11 @@
-
get
(key)[source]
Basic getter that renews LRU status upon inspection
-
-
-
-
-Parameters: | key (str) – immutable dictionary key |
-
-
-
+
+- Parameters
+key (str) – immutable dictionary key
+
+
@@ -1673,14 +1570,11 @@
update
(others)[source]
Same as a regular dict.update, however, since pypy’s dict.update
doesn’t go through dict.__setitem__, this is used to ensure it does
-
-
-
-
-Parameters: | others (iterable) – a dictionary or iterable containing key/value pairs |
-
-
-
+
+- Parameters
+others (iterable) – a dictionary or iterable containing key/value pairs
+
+
@@ -1696,20 +1590,20 @@
-
bamnostic.utils.
parse_cigar
(cigar_str)[source]
Parses a CIGAR string and turns it into a list of tuples
-
-
-
-
-Parameters: | cigar_str (str) – the CIGAR string as shown in SAM entry |
-
-Returns: | list of tuples of CIGAR operations (by id) and number of operations |
-
-Return type: | cigar_array (list) |
-
-Raises: | ValueError – if CIGAR operation is invalid |
-
-
-
+
+- Parameters
+cigar_str (str) – the CIGAR string as shown in SAM entry
+
+- Returns
+list of tuples of CIGAR operations (by id) and number of operations
+
+- Return type
+cigar_array (list)
+
+- Raises
+ValueError – if CIGAR operation is invalid
+
+
Examples
>>> parse_cigar('3M1I3M1D5M') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
[(('BAM_CMATCH', 0), 3), ..., (('BAM_CMATCH', 0), 5)]
@@ -1721,50 +1615,46 @@
-
bamnostic.utils.
check_cigar_arg
(cigar)[source]
Checks to make sure CIGAR arugment is valid.
-
-
-
-
-Parameters: | argument (str or list ) – CIGAR string (pre-formatted or raw) |
-
-Returns: | CIGAR re-formatted as a list |
-
-Return type: | (list ) |
-
-Raises: | ValueError – if CIGAR is not a string or pre-formatted list |
-
-
-
+
+- Parameters
+argument (str or list
) – CIGAR string (pre-formatted or raw)
+
+- Returns
+CIGAR re-formatted as a list
+
+- Return type
+(list
)
+
+- Raises
+ValueError – if CIGAR is not a string or pre-formatted list
+
+
-
bamnostic.utils.
cigar_changes
(seq, cigar)[source]
-
-- Recreates the reference sequence to the extent that the CIGAR string can
-- represent.
+
+- Recreates the reference sequence to the extent that the CIGAR string can
represent.
+
-
-
-
-
-Parameters: |
-- seq (str) – aligned segment sequence
-- cigar (list) – list of tuples of cigar operations (by id) and number of operations
+
+- Parameters
+-
-
|
-
-Returns: | a version of the aligned segment’s reference sequence given the changes reflected in the cigar string
- |
-
-Return type: | cigar_formatted_ref (str)
- |
-
-Raises: | ValueError – if CIGAR operation is invalid
- |
-
-
-
+
+- Returns
+a version of the aligned segment’s reference sequence given the changes reflected in the cigar string
+
+- Return type
+cigar_formatted_ref (str)
+
+- Raises
+ValueError – if CIGAR operation is invalid
+
+
Examples
>>> cigar_changes('ACTAGAATGGCT', '3M1I3M1D5M')
'ACTGAATGGCT'
@@ -1784,31 +1674,27 @@
Recreates the reference sequence of a given alignment to the extent that the
MD tag can represent.
-
Note
-
Used in conjunction with cigar_changes to recreate the
+
Note
+
Used in conjunction with cigar_changes to recreate the
complete reference sequence
-
-
-
-
-Parameters: |
-- seq (str) – aligned segment sequence
-- md_tag (str) – MD tag for associated sequence
+
+- Parameters
+-
-
|
-
-Returns: | a version of the aligned segment’s reference sequence given the changes reflected in the MD tag
- |
-
-Return type: | ref_seq (str)
- |
-
-Raises: | ValueError – if MD tag is None
- |
-
-
-
+
+
- Returns
+
a version of the aligned segment’s reference sequence given the changes reflected in the MD tag
+
+
- Return type
+
ref_seq (str)
+
+
- Raises
+
ValueError – if MD tag is None
+
+
Example
>>> md_changes('CTTATATTGGCCTT', '3C4AT4')
'CTTCTATTATCCTT'
@@ -1824,18 +1710,17 @@
with the aligned segment. This is done without the need for looking up
the reference genome. Example reads, MD tags, and CIGAR strings taken from
David Tang’s Blog.
-
-
-
-
-Returns: | generated reference sequence |
-
-Return type: | (str) |
-
-Raises: | KeyError – if read does not contain MD tag |
-
-
-
+
+- Returns
+generated reference sequence
+
+- Return type
+(str)
+
+- Raises
+KeyError – if read does not contain MD tag
+
+
Examples
# Only mismatches
>>> seq = ‘CGATACGGGGACATCCGGCCTGCTCCTTCTCACATG’
@@ -1870,23 +1755,19 @@
Any clipping results in the removal of those bases. If an insertion is seen in
the CIGAR, those bases are removed from the sequence. If a deletion is seen in
the CIGAR, those bases are padded with a period (‘.’) symbol.
-
-
-
-
-Parameters: |
-- seq (str) – string sequence of the aligned segment.
-- cigar (str) – the cigar string or cigartuple of the aligned segment.
-- start_pos (int) – the first aligned position of the read
-- qualities (
array.array ) – base quality array from read
+
+- Parameters
+
+seq (str) – string sequence of the aligned segment.
+cigar (str) – the cigar string or cigartuple of the aligned segment.
+start_pos (int) – the first aligned position of the read
+qualities (array.array ) – base quality array from read
- |
-
-Yields: | (tuple of str and int ) – nucleotide base and index position of that base relative to reference
- |
-
-
-
+
+
- Yields
+
(tuple
of str
and int
) – nucleotide base and index position of that base relative to reference
+
+
Example
>>> seq = 'AGTGATGGGAGGATGTCTCGTCTGTGAGTTACAGCA'
>>> cigar = '2M1I7M6D26M'
diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html
index af4bc3f..e2614ea 100644
--- a/docs/build/html/genindex.html
+++ b/docs/build/html/genindex.html
@@ -9,7 +9,7 @@
- Index — bamnostic 1.0.9 documentation
+ Index — bamnostic 1.0.10 documentation
@@ -60,7 +60,7 @@
- 1.0.9
+ 1.0.10
diff --git a/docs/build/html/index.html b/docs/build/html/index.html
index c177f86..62ca27c 100644
--- a/docs/build/html/index.html
+++ b/docs/build/html/index.html
@@ -8,7 +8,7 @@
- Welcome to bamnostic’s documentation! — bamnostic 1.0.9 documentation
+ Welcome to bamnostic’s documentation! — bamnostic 1.0.10 documentation
@@ -60,7 +60,7 @@
- 1.0.9
+ 1.0.10
@@ -160,8 +160,8 @@ Welcome to bamnostic’s documentation!
Contents:
@@ -210,9 +210,9 @@
diff --git a/docs/build/html/install.html b/docs/build/html/install.html
index 1ae0708..7437134 100644
--- a/docs/build/html/install.html
+++ b/docs/build/html/install.html
@@ -8,7 +8,7 @@
-
Installation — bamnostic 1.0.9 documentation
+
Installation — bamnostic 1.0.10 documentation
@@ -61,7 +61,7 @@
- 1.0.9
+ 1.0.10
@@ -166,41 +166,41 @@
Installation