-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support CRAI index and CRAM random access
- Loading branch information
Showing
13 changed files
with
290 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
(ns cljam.io.crai | ||
(:require [cljam.util :as util] | ||
[cljam.util.intervals :as intervals] | ||
[clojure.java.io :as io] | ||
[clojure.string :as str])) | ||
|
||
(defn read-index | ||
"Reads a CRAI file `f` and creates an index." | ||
[f refs] | ||
(let [refs (vec refs)] | ||
(with-open [rdr (io/reader (util/compressor-input-stream f))] | ||
(->> (line-seq rdr) | ||
(map (fn [line] | ||
(let [[^long seq-id ^long start ^long span container-offset slice-offset size] | ||
(map #(Long/parseLong %) (str/split line #"\t")) | ||
unmapped? (neg? seq-id)] | ||
{:chr (if unmapped? "*" (:name (nth refs seq-id))) | ||
:start (if unmapped? 0 start) | ||
:end (if unmapped? 0 (+ start span)) | ||
:container-offset container-offset | ||
:slice-offset slice-offset | ||
:size size}))) | ||
intervals/index-intervals)))) | ||
|
||
(defn find-overlapping-entries | ||
"Finds and returns all entries from the index that overlap with the specified | ||
region." | ||
[idx chr start end] | ||
(intervals/find-overlap-intervals idx chr start end)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
(ns cljam.io.crai-test | ||
(:require [cljam.io.crai :as crai] | ||
[cljam.test-common :as common] | ||
[clojure.test :refer [deftest are]])) | ||
|
||
(def ^:private test-refs | ||
(->> (concat (range 1 23) ["X" "Y"]) | ||
(mapv #(array-map :name (str "chr" %))))) | ||
|
||
(deftest read-index-test | ||
(let [idx (crai/read-index common/medium-crai-file test-refs)] | ||
(are [?chr ?start ?end ?expected] | ||
(= ?expected (crai/find-overlapping-entries idx ?chr ?start ?end)) | ||
"chr1" 1 Long/MAX_VALUE | ||
[{:chr "chr1" | ||
:start 546609 | ||
:end (+ 546609 205262429) | ||
:container-offset 324 | ||
:slice-offset 563 | ||
:size 22007} | ||
{:chr "chr1" | ||
:start 206547069 | ||
:end (+ 206547069 42644506) | ||
:container-offset 324 | ||
:slice-offset 22570 | ||
:size 7349}] | ||
|
||
"chr1" 550000 600000 | ||
[{:chr "chr1" | ||
:start 546609 | ||
:end (+ 546609 205262429) | ||
:container-offset 324 | ||
:slice-offset 563 | ||
:size 22007}] | ||
|
||
"chr1" 210000000 240000000 | ||
[{:chr "chr1" | ||
:start 206547069 | ||
:end (+ 206547069 42644506) | ||
:container-offset 324 | ||
:slice-offset 22570 | ||
:size 7349}] | ||
|
||
"chr1" 200000000 210000000 | ||
[{:chr "chr1" | ||
:start 546609 | ||
:end (+ 546609 205262429) | ||
:container-offset 324 | ||
:slice-offset 563 | ||
:size 22007} | ||
{:chr "chr1" | ||
:start 206547069 | ||
:end (+ 206547069 42644506) | ||
:container-offset 324 | ||
:slice-offset 22570 | ||
:size 7349}] | ||
|
||
"*" 0 0 | ||
[{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 354657 | ||
:slice-offset 563 | ||
:size 23119} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 171 | ||
:size 23494} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 23665 | ||
:size 23213} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 46878 | ||
:size 23051} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 69929 | ||
:size 23563} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 93492 | ||
:size 24231} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 117723 | ||
:size 24078} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 141801 | ||
:size 23871} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 165672 | ||
:size 24365} | ||
{:chr "*" | ||
:start 0 | ||
:end 0 | ||
:container-offset 378365 | ||
:slice-offset 190037 | ||
:size 12326}]))) |
Oops, something went wrong.