From 131e0ad6e0279238b747dc1b16070da2109d42eb Mon Sep 17 00:00:00 2001 From: Niyarin Date: Wed, 7 Jun 2023 10:26:35 +0900 Subject: [PATCH] Add validator for writing vcf. --- src/cljam/io/vcf/util/validator.clj | 62 +++++++++++++++++++++ test/cljam/io/vcf/util/validator_test.clj | 66 +++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 src/cljam/io/vcf/util/validator.clj create mode 100644 test/cljam/io/vcf/util/validator_test.clj diff --git a/src/cljam/io/vcf/util/validator.clj b/src/cljam/io/vcf/util/validator.clj new file mode 100644 index 00000000..8ca33cd5 --- /dev/null +++ b/src/cljam/io/vcf/util/validator.clj @@ -0,0 +1,62 @@ +(ns cljam.io.vcf.util.validator + (:require [cljam.io.vcf.util :as vcf-util])) + +(defn- valid-ref? [s] + (or (nil? s) + (every? #{\A \T \G \C \N + \a \t \g \c \n} s))) + +(defn- valid-qual? [v] + (or (nil? v) + (float? v) + (integer? v))) + +(defn- check-base-records [variant valid-contig? valid-format?] + (cond-> nil + (not (valid-contig? (:chr variant))) (assoc :chr "Invalid chr") + (not (integer? (:pos variant))) (assoc :pos "Invalid pos") + (not (valid-ref? (:ref variant))) (assoc :ref "Invalid ref") + (not (every? #(vcf-util/inspect-allele % (:ref variant)) (:alt variant))) + (assoc :alt "Invalid alt") + + (not (valid-qual? (:qual variant))) (assoc :qual "Invalid ref") + (not (valid-format? (:format variant))) + (assoc :format ["Invalid format" (:foramt variant) valid-format?]))) + +(defn- check-entry-type [entry type-str] + ((case type-str + "Integer" integer? + "Float" #(or (float? %) (integer? %)) + "Character" char? + "String" string? + "Flag" (constantly true) + (constantly false)) + entry)) + +(defn- check-each-samples [variant samples vformat] + (reduce + (fn [res [ks v]] (assoc-in res ks v)) + nil + (for [sample samples + [fmt {:keys [type number]}] vformat + :let [entry (get-in variant [sample fmt]) + entries (if (= number 1) [entry] entry) + type-check (and entry + (not (every? #(check-entry-type + % type) entry))) + number-check (and entry (not= (count entries) number))] + :when (or type-check number-check)] + [[sample fmt] + (cond + type-check (format + "Not match type declaration. Requires %s , but %s." + type (str entries)) + number-check (format + "Invalid number of elements. Requires %d , but %d." + number (count entries)))]))) + +(defn invalid-variant? [variant samples {:keys [contig format]}] + (let [valid-contig? #(or (nil? %) ((set (map :id contig)) %)) + valid-format? #(or (nil? %) ((set (map (comp keyword :id) format)) %))] + (merge (check-base-records variant valid-contig? valid-format?) + (check-each-samples variant samples format)))) diff --git a/test/cljam/io/vcf/util/validator_test.clj b/test/cljam/io/vcf/util/validator_test.clj new file mode 100644 index 00000000..401f9abd --- /dev/null +++ b/test/cljam/io/vcf/util/validator_test.clj @@ -0,0 +1,66 @@ +(ns cljam.io.vcf.util.validator-test + (:require + [clojure.test :refer [deftest is testing]] + [cljam.io.vcf.util.validator :as validator])) + +(deftest check-each-samples-test + (testing "type check" + (is (not + (#'validator/check-each-samples + {:normal {:DP 100}} [:normal] + {:DP {:type "Integer" :number 1}}))) + + (is (string? + (get-in (#'validator/check-each-samples + {:normal {:DP "100"}} [:normal] + {:DP {:type "Integer" :number 1}}) + [:normal :DP]))) + + (is (not + (#'validator/check-each-samples + {:normal {:GT "1|0"}} [:normal] + {:GT {:type "String" :number 1}}))) + (is (not + (#'validator/check-each-samples + {:normal {:A [1 2]}} [:normal] + {:A {:type "Integer" :number 2}})))) + + (testing "number check" + (is (not + (#'validator/check-each-samples + {:normal {:A [1 2]}} [:normal] + {:A {:type "Integer" :number 2}}))) + + (is (string? + (get-in + (#'validator/check-each-samples + {:normal {:A [1]}} [:normal] + {:A {:type "Integer" :number 2}}) + [:normal :A]))))) + +(deftest invalid-variant?-test + (testing "contig check" + (is (not + (validator/invalid-variant? + {:chr "chr10" :pos 10} + nil + {:contig [{:id "chr10"}]})))) + (testing "alt check" + (is (not + (validator/invalid-variant? + {:chr "chr10" :pos 10 :alt ["A" "" nil]} + nil + {:contig [{:id "chr10"}]}))) + (is (not + (validator/invalid-variant? + {:chr "chr10" :pos 10 :alt ["G]17:198982]" + "]13:123456]T"]} + nil + {:contig [{:id "chr10"}]})))) + (testing "qual check" + (is (not + (validator/invalid-variant? + {:chr "chr10" :pos 10 + :qual 1} + nil + {:contig [{:id "chr10"}]})))))