Skip to content

Commit

Permalink
Support delta-encoded AP in sorted CRAM files
Browse files Browse the repository at this point in the history
  • Loading branch information
athos committed Aug 30, 2024
1 parent 79205f6 commit cec1235
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 28 deletions.
9 changes: 7 additions & 2 deletions src/cljam/io/cram/encode/context.clj
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
(ns cljam.io.cram.encode.context
(:require [cljam.io.cram.data-series :as ds]
[cljam.io.cram.encode.tag-dict :as tag-dict]))
[cljam.io.cram.encode.tag-dict :as tag-dict]
[cljam.io.sam.util.header :as sam.header]))

(defn make-container-context
"Creates a new container context."
[cram-header preservation-map seq-resolver]
[cram-header seq-resolver]
(let [rname->idx (into {}
(map-indexed (fn [i {:keys [SN]}] [SN i]))
(:SQ cram-header))
preservation-map (cond-> {:RN true, :AP false, :RR true}
(= (sam.header/sort-order cram-header)
sam.header/order-coordinate)
(assoc :AP true))
subst-mat {\A {\T 0, \G 1, \C 2, \N 3}
\T {\A 0, \G 1, \C 2, \N 3}
\G {\A 0, \T 1, \C 2, \N 3}
Expand Down
13 changes: 10 additions & 3 deletions src/cljam/io/cram/encode/record.clj
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,22 @@
-1
(get rname->idx rname)))

(defn- build-positional-data-encoder [{:keys [cram-header]} {:keys [RI RL AP RG]}]
(defn- build-positional-data-encoder
[{:keys [cram-header preservation-map]} {:keys [RI RL AP RG]}]
(let [rg-id->idx (into {}
(map-indexed (fn [i {:keys [ID]}] [ID i]))
(:RG cram-header))]
(:RG cram-header))
AP' (if (:AP preservation-map)
(let [pos (volatile! nil)]
(fn [^long pos']
(AP (- pos' (long (or @pos 0))))
(vreset! pos pos')))
AP)]
(fn [record]
(let [rg (sam.option/value-for-tag :RG record)]
(RI (::ref-index record))
(RL (count (:seq record)))
(AP (:pos record))
(AP' (:pos record))
(RG (if rg (get rg-id->idx rg) -1))))))

(defn- build-read-name-encoder [{:keys [RN]}]
Expand Down
9 changes: 3 additions & 6 deletions src/cljam/io/cram/writer.clj
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,8 @@
(struct/encode-cram-header-container (.-stream wtr) header))

(defn- preprocess-records
[cram-header preservation-map seq-resolver options ^objects container-records]
(let [container-ctx (context/make-container-context cram-header
preservation-map
seq-resolver)
[cram-header seq-resolver options ^objects container-records]
(let [container-ctx (context/make-container-context cram-header seq-resolver)
{:keys [ds-compressor-overrides tag-compressor-overrides]} options]
(dotimes [i (alength container-records)]
(let [slice-records (aget container-records i)]
Expand Down Expand Up @@ -173,8 +171,7 @@
(crai/write-index-entries index-writer entries)))

(defn- write-container [^CRAMWriter wtr cram-header counter container-records]
(let [preservation-map {:RN true, :AP false, :RR true}
container-ctx (preprocess-records cram-header preservation-map (.-seq-resolver wtr)
(let [container-ctx (preprocess-records cram-header (.-seq-resolver wtr)
(.-options wtr) container-records)
slices (generate-slices container-ctx counter container-records)
compression-header-block (generate-compression-header-block container-ctx)
Expand Down
7 changes: 4 additions & 3 deletions test/cljam/io/cram/encode/record_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
4]))

(defn- preprocess-slice-records [cram-header records]
(let [container-ctx (context/make-container-context cram-header {} test-seq-resolver)]
(let [container-ctx (context/make-container-context cram-header test-seq-resolver)]
(record/preprocess-slice-records container-ctx records)
(context/finalize-container-context container-ctx
(constantly :raw)
Expand Down Expand Up @@ -152,7 +152,8 @@

(deftest encode-slice-records-test
(testing "mapped reads"
(let [cram-header {:SQ
(let [cram-header {:HD {:SO "coordinate"}
:SQ
[{:SN "ref"}
{:SN "ref2"}]
:RG
Expand Down Expand Up @@ -208,7 +209,7 @@

(is (= 1 (count (get ds-res :AP))))
(is (= 5 (get-in ds-res [:AP 0 :content-id])))
(is (= [1 5 10 15 20] (seq (get-in ds-res [:AP 0 :data]))))
(is (= [1 4 5 5 5] (seq (get-in ds-res [:AP 0 :data]))))

(is (= 1 (count (get ds-res :RG))))
(is (= 6 (get-in ds-res [:RG 0 :content-id])))
Expand Down
45 changes: 31 additions & 14 deletions test/cljam/io/cram_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
[clojure.test :refer [are deftest is testing]]))

(def ^:private temp-cram-file (io/file common/temp-dir "test.cram"))
(def ^:private temp-sorted-cram-file (io/file common/temp-dir "test.sorted.cram"))
(def ^:private temp-cram-file-2 (io/file common/temp-dir "test2.cram"))
(def ^:private temp-cram-file-3 (io/file common/temp-dir "test3.cram"))

Expand Down Expand Up @@ -74,20 +75,36 @@
(deftest writer-test
(with-before-after {:before (prepare-cache!)
:after (clean-cache!)}
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
w (cram/writer temp-cram-file
{:reference common/test-fa-file})]
(cram/write-header w (cram/read-header r))
(cram/write-alignments w (cram/read-alignments r) (cram/read-header r)))
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
r' (cram/reader temp-cram-file
{:reference common/test-fa-file})]
(is (= (cram/read-header r)
(cram/read-header r')))
(is (= (cram/read-alignments r)
(cram/read-alignments r'))))))
(testing "unsorted"
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
w (cram/writer temp-cram-file
{:reference common/test-fa-file})]
(cram/write-header w (cram/read-header r))
(cram/write-alignments w (cram/read-alignments r) (cram/read-header r)))
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
r' (cram/reader temp-cram-file
{:reference common/test-fa-file})]
(is (= (cram/read-header r)
(cram/read-header r')))
(is (= (cram/read-alignments r)
(cram/read-alignments r')))))
(testing "sorted by coordinate"
(with-open [r (cram/reader common/test-sorted-cram-file
{:reference common/test-fa-file})
w (cram/writer temp-sorted-cram-file
{:reference common/test-fa-file})]
(cram/write-header w (cram/read-header r))
(cram/write-alignments w (cram/read-alignments r) (cram/read-header r)))
(with-open [r (cram/reader common/test-sorted-cram-file
{:reference common/test-fa-file})
r' (cram/reader temp-sorted-cram-file
{:reference common/test-fa-file})]
(is (= (cram/read-header r)
(cram/read-header r')))
(is (= (cram/read-alignments r)
(cram/read-alignments r')))))))

(deftest-remote writer-with-multiple-containers-test
(with-before-after {:before (do (prepare-cavia!)
Expand Down

0 comments on commit cec1235

Please sign in to comment.