Skip to content

Commit

Permalink
Fixes #26: Ranges are [inclusive, exclusive)
Browse files Browse the repository at this point in the history
Because of how the interval checking are defined, genomic ranges cannot start and end at the same spot. This is to keep the API consistent with the Python language.

For example:

```python
>>> a = 'abc'
>>> print(a[1:1])
''
```
  • Loading branch information
betteridiot committed Nov 5, 2018
1 parent 20375b8 commit 930f972
Show file tree
Hide file tree
Showing 25 changed files with 85 additions and 73 deletions.
6 changes: 1 addition & 5 deletions bamnostic/bai.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,6 @@ def query(self, ref_id, start, stop=-1):
self.current_ref = self.get_ref(ref_id)

# get linear index first
# how many windows do we need to go over
# because of floor div, we need to make it 0-based

reg_lin_idx = start >> self.BAM_LIDX_SHIFT

Expand All @@ -412,9 +410,7 @@ def query(self, ref_id, start, stop=-1):
continue

for chunk in bin_chunks:
if not chunk.voffset_beg <= linear_offset <= chunk.voffset_end:
continue
else:
if chunk.voffset_beg <= linear_offset <= chunk.voffset_end:
return chunk.voffset_beg

def seek(self, offset=None, whence=0):
Expand Down
54 changes: 36 additions & 18 deletions bamnostic/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,11 @@ def __init__(self, filepath_or_object, mode="rb", max_cache=128, index_filename=
'duplicate_filehandle': locals()['duplicate_filehandle']}
super(BamReader, self).__init__(**super_args)

self._igore_truncation = ignore_truncation
self._ignore_truncation = ignore_truncation
self._truncated = self._check_truncation()

# Check BAM file integrity
if not self._igore_truncation:
if not self._ignore_truncation:
if self._truncated:
raise Exception('BAM file may be truncated. Turn off ignore_truncation if you wish to continue')

Expand Down Expand Up @@ -486,7 +486,7 @@ def has_index(self):
def fetch(self, contig=None, start=None, stop=None, region=None,
tid=None, until_eof=False, multiple_iterators=False,
reference=None, end=None):
r"""Creates a generator that returns all reads within the given region
r"""Creates a generator that returns all reads within the given region. (inclusive, exclusive)
Args:
contig (str): name of reference/contig
Expand Down Expand Up @@ -539,6 +539,11 @@ def fetch(self, contig=None, start=None, stop=None, region=None,
AssertionError: Malformed region: start should be <= stop, you entered 100, 10
"""
# Inclusive, exclusive. This means if start and stop are
# the same, then the user is *essentially* looking at nothing
# e.g. a = "abc"; print(a[1:1]) -> ''
if start == stop:
return

if not self._random_access:
raise ValueError('Random access not available due to lack of index file')
Expand Down Expand Up @@ -580,27 +585,40 @@ def fetch(self, contig=None, start=None, stop=None, region=None,
# move to that virtual offset...should load the block into the cache
# if it hasn't been visited before
self.seek(first_read_block)
boundary_check = True
while boundary_check:
next_read = next(self)

for next_read in self:
if not until_eof:
# check to see if the read is out of bounds of the region
if next_read.reference_name != query.contig:
boundary_check = False
elif query.start < query.stop <= next_read.pos:
boundary_check = False
elif next_read.pos <= query.start <= next_read.pos + len(next_read.seq):
yield next_read
elif next_read.pos < query.start:
continue
elif not query.start <= next_read.pos < query.stop:
# On the wrong contig -> not the right place
if next_read.reference_name != query.contig:
return None

# Read is too far left -> keep going
elif (next_read.pos + len(next_read.seq)) < query.start:
continue

# Originates outside, but overlaps
elif next_read.pos < query.start <= (next_read.pos + len(next_read)):
yield next_read

# Read wholly inside region
elif query.start <= next_read.pos < query.stop:
yield next_read

# Read too far right -> gotta stop
elif query.stop <= next_read.pos:
return None

# check for stop iteration
elif next_read:
yield next_read

# Empty read
else:
return
else:
return None

# Read until the end of file
else:
try:
yield next_read
except:
Expand Down Expand Up @@ -1030,7 +1048,7 @@ def next(self):

read = bamnostic.AlignedSegment(self)
if not read:
raise StopIteration
return
return read

def seekable(self):
Expand Down
7 changes: 4 additions & 3 deletions bamnostic/bgzf.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,7 @@ def seek(self, virtual_offset):
assert start_offset == self._block_start_offset
if within_block > len(self._buffer):
if not (within_block == 0 and len(self._buffer) == 0):
raise ValueError("Within offset %i but block size only %i"
% (within_block, len(self._buffer)))
raise ValueError("Within offset {} but block size only {}".format(within_block, len(self._buffer)))
self._within_block_offset = within_block
return virtual_offset

Expand Down Expand Up @@ -431,7 +430,9 @@ def read(self, size=-1):
# if there is still more to read
elif size:
# pull rest of data from next block
return data + self.read(size)
data += self._buffer[self._within_block_offset: self._within_block_offset + size]
self._within_block_offset += size
return data

else:
# Only needed the end of the last block
Expand Down
4 changes: 2 additions & 2 deletions bamnostic/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ def __init__(self, _io):
block_size = unpack_int32(bsize_buffer)[0]

# Check for EOF: If the cursor is at the end of file, read() will return
# an empty byte string. If
# an empty byte string.
except struct.error:
if all([not bsize_buffer, not self._io._handle.read()]):
if not self._io._igore_truncation and not self._io._truncated:
if not self._io._ignore_truncation and not self._io._truncated:
raise StopIteration('End of file reached')
else:
raise StopIteration('Potential end of file reached')
Expand Down
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
"description": "BAMnostic is a Pure Python OS, version, and runtime agnostic BAM file parser",
"keywords": "BAM, pysam, genomics, genetics, htslib, samtools",
"license": "https://github.com/betteridiot/bamnostic/blob/master/LICENSE",
"softwareVersion": "v1.0.1,
"version": "v1.0.1",
"softwareVersion": "v1.0.2,
"version": "v1.0.2",
"readme": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"buildInstructions": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"issueTracker": "https://github.com/betteridiot/bamnostic/issues",
Expand Down
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/build/html/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 7fe887b2cbc5b91bf05ea54ef36c1a08
config: 3ccb95d73a1a33548bd2a0dda7fa8824
tags: 645f666f9bcd5a90fca523b33c5a78b7
10 changes: 3 additions & 7 deletions docs/build/html/_modules/bamnostic/bai.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>bamnostic.bai &mdash; bamnostic 1.0.1 documentation</title>
<title>bamnostic.bai &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -56,7 +56,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down Expand Up @@ -548,8 +548,6 @@ <h1>Source code for bamnostic.bai</h1><div class="highlight"><pre>
<span class="bp">self</span><span class="o">.</span><span class="n">current_ref</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_ref</span><span class="p">(</span><span class="n">ref_id</span><span class="p">)</span>

<span class="c1"># get linear index first</span>
<span class="c1"># how many windows do we need to go over</span>
<span class="c1"># because of floor div, we need to make it 0-based</span>

<span class="n">reg_lin_idx</span> <span class="o">=</span> <span class="n">start</span> <span class="o">&gt;&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">BAM_LIDX_SHIFT</span>

Expand All @@ -563,9 +561,7 @@ <h1>Source code for bamnostic.bai</h1><div class="highlight"><pre>
<span class="k">continue</span>

<span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">bin_chunks</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">chunk</span><span class="o">.</span><span class="n">voffset_beg</span> <span class="o">&lt;=</span> <span class="n">linear_offset</span> <span class="o">&lt;=</span> <span class="n">chunk</span><span class="o">.</span><span class="n">voffset_end</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">chunk</span><span class="o">.</span><span class="n">voffset_beg</span> <span class="o">&lt;=</span> <span class="n">linear_offset</span> <span class="o">&lt;=</span> <span class="n">chunk</span><span class="o">.</span><span class="n">voffset_end</span><span class="p">:</span>
<span class="k">return</span> <span class="n">chunk</span><span class="o">.</span><span class="n">voffset_beg</span></div>

<div class="viewcode-block" id="Bai.seek"><a class="viewcode-back" href="../../bamnostic.html#bamnostic.bai.Bai.seek">[docs]</a> <span class="k">def</span> <span class="nf">seek</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">whence</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
Expand Down
11 changes: 6 additions & 5 deletions docs/build/html/_modules/bamnostic/bgzf.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>bamnostic.bgzf &mdash; bamnostic 1.0.1 documentation</title>
<title>bamnostic.bgzf &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -56,7 +56,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down Expand Up @@ -532,8 +532,7 @@ <h1>Source code for bamnostic.bgzf</h1><div class="highlight"><pre>
<span class="k">assert</span> <span class="n">start_offset</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_block_start_offset</span>
<span class="k">if</span> <span class="n">within_block</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">within_block</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Within offset </span><span class="si">%i</span><span class="s2"> but block size only </span><span class="si">%i</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="p">(</span><span class="n">within_block</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">)))</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Within offset </span><span class="si">{}</span><span class="s2"> but block size only </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">within_block</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">)))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_within_block_offset</span> <span class="o">=</span> <span class="n">within_block</span>
<span class="k">return</span> <span class="n">virtual_offset</span></div>

Expand Down Expand Up @@ -582,7 +581,9 @@ <h1>Source code for bamnostic.bgzf</h1><div class="highlight"><pre>
<span class="c1"># if there is still more to read</span>
<span class="k">elif</span> <span class="n">size</span><span class="p">:</span>
<span class="c1"># pull rest of data from next block</span>
<span class="k">return</span> <span class="n">data</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">size</span><span class="p">)</span>
<span class="n">data</span> <span class="o">+=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_within_block_offset</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_within_block_offset</span> <span class="o">+</span> <span class="n">size</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_within_block_offset</span> <span class="o">+=</span> <span class="n">size</span>
<span class="k">return</span> <span class="n">data</span>

<span class="k">else</span><span class="p">:</span>
<span class="c1"># Only needed the end of the last block</span>
Expand Down
8 changes: 4 additions & 4 deletions docs/build/html/_modules/bamnostic/core.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>bamnostic.core &mdash; bamnostic 1.0.1 documentation</title>
<title>bamnostic.core &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -56,7 +56,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down Expand Up @@ -342,10 +342,10 @@ <h1>Source code for bamnostic.core</h1><div class="highlight"><pre>
<span class="n">block_size</span> <span class="o">=</span> <span class="n">unpack_int32</span><span class="p">(</span><span class="n">bsize_buffer</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>

<span class="c1"># Check for EOF: If the cursor is at the end of file, read() will return </span>
<span class="c1"># an empty byte string. If </span>
<span class="c1"># an empty byte string.</span>
<span class="k">except</span> <span class="n">struct</span><span class="o">.</span><span class="n">error</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">all</span><span class="p">([</span><span class="ow">not</span> <span class="n">bsize_buffer</span><span class="p">,</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">_handle</span><span class="o">.</span><span class="n">read</span><span class="p">()]):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">_igore_truncation</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">_truncated</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">_ignore_truncation</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">_truncated</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">StopIteration</span><span class="p">(</span><span class="s1">&#39;End of file reached&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">StopIteration</span><span class="p">(</span><span class="s1">&#39;Potential end of file reached&#39;</span><span class="p">)</span>
Expand Down
4 changes: 2 additions & 2 deletions docs/build/html/_modules/bamnostic/utils.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>bamnostic.utils &mdash; bamnostic 1.0.1 documentation</title>
<title>bamnostic.utils &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -56,7 +56,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down
4 changes: 2 additions & 2 deletions docs/build/html/_modules/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Overview: module code &mdash; bamnostic 1.0.1 documentation</title>
<title>Overview: module code &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -56,7 +56,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down
2 changes: 1 addition & 1 deletion docs/build/html/_static/documentation_options.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '1.0.1',
VERSION: '1.0.2',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
Expand Down
4 changes: 2 additions & 2 deletions docs/build/html/bamnostic.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>bamnostic package &mdash; bamnostic 1.0.1 documentation</title>
<title>bamnostic package &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -58,7 +58,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down
4 changes: 2 additions & 2 deletions docs/build/html/genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Index &mdash; bamnostic 1.0.1 documentation</title>
<title>Index &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -57,7 +57,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down
4 changes: 2 additions & 2 deletions docs/build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Welcome to bamnostic’s documentation! &mdash; bamnostic 1.0.1 documentation</title>
<title>Welcome to bamnostic’s documentation! &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -57,7 +57,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down
4 changes: 2 additions & 2 deletions docs/build/html/install.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Installation &mdash; bamnostic 1.0.1 documentation</title>
<title>Installation &mdash; bamnostic 1.0.2 documentation</title>



Expand Down Expand Up @@ -58,7 +58,7 @@


<div class="version">
1.0.1
1.0.2
</div>


Expand Down
Binary file modified docs/build/html/objects.inv
Binary file not shown.
Loading

0 comments on commit 930f972

Please sign in to comment.