Skip to content

Commit

Permalink
Merge pull request #48 from 4dn-dcic/0.3.1
Browse files Browse the repository at this point in the history
0.3.1
  • Loading branch information
Carl Vitzthum authored Oct 24, 2017
2 parents 862d807 + 4b61c3d commit f251e4c
Show file tree
Hide file tree
Showing 13 changed files with 181 additions and 55 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ compiler: gcc
python:
- '3.6'
- '2.7'
before_install:
- sudo apt-get install -qq valgrind
script:
- make
- |
Expand All @@ -17,3 +19,4 @@ script:
python test/test.py
fi
- source test/test_c.sh
- valgrind --error-exitcode=42 --leak-check=full test/test_c.sh
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ By default '|' is used to split the two genomic regions, but in some cases, a di
pairix -W textfile.gz
```

#### Print out number of bgzf blocks that span each chromosome pair.
This command prints out the number of bgzk blocks for all chromosome pairs.
```
pairix -B textfile.gz
```


<br>

Expand Down Expand Up @@ -452,6 +458,10 @@ print (tb.get_header())
# get chromsize
tb=pypairix.open("textfile.gz")
print (tb.get_chromsize())
# get the number of bgzf blocks that span a given chromosome pair
tb=pypairix.open("textfile.gz")
print (tb.bgzf_block_count("chr1", "chr2"))
```

<br>
Expand Down Expand Up @@ -619,6 +629,10 @@ ulimit -n 2000

## Version history

### 0.3.1
* `pairix -B` option is now available to print out the number of bgzf blocks for each chromosome (pair).
* The same function is available for pypairix.

### 0.3.0
* The problem with `fragment_4dnpairs.pl` of adding an extra column is now fixed.
* 1D querying on 2D data now works with `pypairix` (function `querys2D`).
Expand Down
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.0
0.3.1
Binary file modified samples/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2
Binary file not shown.
Binary file modified samples/merged_nodup.tab.chrblock_sorted.txt.gz.px2
Binary file not shown.
Binary file not shown.
21 changes: 21 additions & 0 deletions src/bgzf.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,27 @@ static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
static void cache_block(BGZF *fp, int size) {}
#endif

int bgzf_block_length(BGZF *fp, int64_t block_start_offset)
{
uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
int count, block_length, remaining;
int64_t block_address;
bgzf_seek(fp, block_start_offset, SEEK_SET);
block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
if (load_block_from_cache(fp, block_address)) return 0;
count = _bgzf_read(fp->fp, header, sizeof(header));
if (count == 0) { // no data read
fp->block_length = 0;
return 0;
}
if (count != sizeof(header) || !check_header(header)) {
fp->errcode |= BGZF_ERR_HEADER;
return -1;
}
block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
return(block_length);
}

int bgzf_read_block(BGZF *fp)
{
uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
Expand Down
5 changes: 5 additions & 0 deletions src/bgzf.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@ extern "C" {
*/
int bgzf_read_block(BGZF *fp);

/**
* returns block length for a given block start offset
*/
int bgzf_block_length(BGZF *fp, int64_t block_start_offset);

#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit f251e4c

Please sign in to comment.