Skip to content

Commit

Permalink
Add validator for writing vcf.
Browse files Browse the repository at this point in the history
  • Loading branch information
niyarin committed Jun 12, 2023
1 parent edecb9d commit 131e0ad
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 0 deletions.
62 changes: 62 additions & 0 deletions src/cljam/io/vcf/util/validator.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
(ns cljam.io.vcf.util.validator
(:require [cljam.io.vcf.util :as vcf-util]))

(defn- valid-ref? [s]
(or (nil? s)
(every? #{\A \T \G \C \N
\a \t \g \c \n} s)))

(defn- valid-qual? [v]
(or (nil? v)
(float? v)
(integer? v)))

(defn- check-base-records [variant valid-contig? valid-format?]
(cond-> nil
(not (valid-contig? (:chr variant))) (assoc :chr "Invalid chr")
(not (integer? (:pos variant))) (assoc :pos "Invalid pos")
(not (valid-ref? (:ref variant))) (assoc :ref "Invalid ref")
(not (every? #(vcf-util/inspect-allele % (:ref variant)) (:alt variant)))
(assoc :alt "Invalid alt")

(not (valid-qual? (:qual variant))) (assoc :qual "Invalid ref")
(not (valid-format? (:format variant)))
(assoc :format ["Invalid format" (:foramt variant) valid-format?])))

(defn- check-entry-type [entry type-str]
((case type-str
"Integer" integer?
"Float" #(or (float? %) (integer? %))
"Character" char?
"String" string?
"Flag" (constantly true)
(constantly false))
entry))

(defn- check-each-samples [variant samples vformat]
(reduce
(fn [res [ks v]] (assoc-in res ks v))
nil
(for [sample samples
[fmt {:keys [type number]}] vformat
:let [entry (get-in variant [sample fmt])
entries (if (= number 1) [entry] entry)
type-check (and entry
(not (every? #(check-entry-type
% type) entry)))
number-check (and entry (not= (count entries) number))]
:when (or type-check number-check)]
[[sample fmt]
(cond
type-check (format
"Not match type declaration. Requires %s , but %s."
type (str entries))
number-check (format
"Invalid number of elements. Requires %d , but %d."
number (count entries)))])))

(defn invalid-variant? [variant samples {:keys [contig format]}]
(let [valid-contig? #(or (nil? %) ((set (map :id contig)) %))
valid-format? #(or (nil? %) ((set (map (comp keyword :id) format)) %))]
(merge (check-base-records variant valid-contig? valid-format?)
(check-each-samples variant samples format))))
66 changes: 66 additions & 0 deletions test/cljam/io/vcf/util/validator_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
(ns cljam.io.vcf.util.validator-test
(:require
[clojure.test :refer [deftest is testing]]
[cljam.io.vcf.util.validator :as validator]))

(deftest check-each-samples-test
(testing "type check"
(is (not
(#'validator/check-each-samples
{:normal {:DP 100}} [:normal]
{:DP {:type "Integer" :number 1}})))

(is (string?
(get-in (#'validator/check-each-samples
{:normal {:DP "100"}} [:normal]
{:DP {:type "Integer" :number 1}})
[:normal :DP])))

(is (not
(#'validator/check-each-samples
{:normal {:GT "1|0"}} [:normal]
{:GT {:type "String" :number 1}})))
(is (not
(#'validator/check-each-samples
{:normal {:A [1 2]}} [:normal]
{:A {:type "Integer" :number 2}}))))

(testing "number check"
(is (not
(#'validator/check-each-samples
{:normal {:A [1 2]}} [:normal]
{:A {:type "Integer" :number 2}})))

(is (string?
(get-in
(#'validator/check-each-samples
{:normal {:A [1]}} [:normal]
{:A {:type "Integer" :number 2}})
[:normal :A])))))

(deftest invalid-variant?-test
(testing "contig check"
(is (not
(validator/invalid-variant?
{:chr "chr10" :pos 10}
nil
{:contig [{:id "chr10"}]}))))
(testing "alt check"
(is (not
(validator/invalid-variant?
{:chr "chr10" :pos 10 :alt ["A" "<DEL>" nil]}
nil
{:contig [{:id "chr10"}]})))
(is (not
(validator/invalid-variant?
{:chr "chr10" :pos 10 :alt ["G]17:198982]"
"]13:123456]T"]}
nil
{:contig [{:id "chr10"}]}))))
(testing "qual check"
(is (not
(validator/invalid-variant?
{:chr "chr10" :pos 10
:qual 1}
nil
{:contig [{:id "chr10"}]})))))

0 comments on commit 131e0ad

Please sign in to comment.