Skip to content

Commit

Permalink
Fixes #16: Detecting EOF for serial access
Browse files Browse the repository at this point in the history
The program was originally looking for the EOF marker, but I forgot that when the EOF marker BGZF block is decompressed, it evaluates to an empty bytestring `b''`. I put in some extra checks so that if an empty block is found, it ensures it is at the file and is a legitimate end of file.

If the file is truncated, it will `raise` a slightly different `StopIteration` error stating that the *potential* end of file is reached.

I ran this on the file you mentioned in #15 (btw, awesome Issue suggestion. Thank you for following the template). My debugging is properly running through the file now.
  • Loading branch information
betteridiot committed Sep 14, 2018
1 parent 2f6e387 commit 8ba2388
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
8 changes: 5 additions & 3 deletions bamnostic/bgzf.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,11 @@ def __init__(self, filepath_or_object, mode="rb", max_cache=128, index_filename=
# Connect to the BAM file
self._handle = handle

self._truncated = self._check_truncation()
self._igore_truncation = ignore_truncation
# Check BAM file integrity
if not ignore_truncation:
if self._check_truncation():
if not self._igore_truncation:
if self._truncated:
raise Exception('BAM file may be truncated. Turn off ignore_truncation if you wish to continue')

# Connect and process the Index file (if present)
Expand Down Expand Up @@ -525,7 +527,7 @@ def _load_block(self, start_offset=None):
if self._text:
self._buffer = ""
else:
self._buffer = b""
self._buffer = b''
self._within_block_offset = 0
self._block_raw_length = block_size

Expand Down
17 changes: 15 additions & 2 deletions bamnostic/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
@email: "mdsherman<at>betteridiot<dot>tech"
"""

import os
import struct
import sys
from array import array
Expand Down Expand Up @@ -167,7 +167,20 @@ def __init__(self, _io):
"""
self._io = _io
block_size = unpack_int32(self._io.read(4))[0]
bsize_buffer = self._io.read(4)
try:
block_size = unpack_int32(bsize_buffer)[0]

# Check for EOF: If the cursor is at the end of file, read() will return
# an empty byte string. If
except struct.error:
if all([not bsize_buffer, not self._io._handle.read()]):
if not self._io._igore_truncation and not self._io._truncated:
raise StopIteration('End of file reached')
else:
raise StopIteration('Potential end of file reached')
else:
raise IOError('Reached End of file, but marker does not match BAM standard')

# Pull in the whole read
self._byte_stream = bytearray(self._io.read(block_size))
Expand Down
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
"description": "BAMnostic is a Pure Python OS, version, and runtime agnostic BAM file parser",
"keywords": "BAM, pysam, genomics, genetics, htslib, samtools",
"license": "https://github.com/betteridiot/bamnostic/blob/master/LICENSE",
"softwareVersion": "v0.8.13",
"version": "v0.8.13",
"softwareVersion": "v0.9.0",
"version": "v0.9.0",
"readme": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"buildInstructions": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"issueTracker": "https://github.com/betteridiot/bamnostic/issues",
Expand Down
4 changes: 2 additions & 2 deletions docs/paper/codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
"description": "BAMnostic: an OS-agnostic toolkit for genomic sequence analysis",
"keywords": "BAM, pysam, genomics, genetics, htslib, samtools",
"license": "https://github.com/betteridiot/bamnostic/blob/master/LICENSE",
"softwareVersion": "v0.8.13",
"version": "v0.8.13",
"softwareVersion": "v0.9.0",
"version": "v0.9.0",
"readme": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"buildInstructions": "https://github.com/betteridiot/bamnostic/blob/master/README.md",
"issueTracker": "https://github.com/betteridiot/bamnostic/issues",
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def readme():

setup(
name='bamnostic',
version='0.8.13',
version='0.9.0',
description='Pure Python, OS-agnostic Binary Alignment Map (BAM) random access and parsing tool',
long_description=readme(),
url='https://github.com/betteridiot/bamnostic/',
Expand Down

0 comments on commit 8ba2388

Please sign in to comment.