First attempt of PP3 and PB4 VCEPs

bihealth · Sep 5, 2024 · 9e235fd · 9e235fd
1 parent 93f6066
commit 9e235fd
Show file tree

Hide file tree

Showing 17 changed files with 576 additions and 175 deletions.
diff --git a/src/auto_acmg.py b/src/auto_acmg.py
@@ -375,6 +375,28 @@ def parse_seqvar_data(self, seqvar: SeqVar) -> AutoACMGSeqVarResult:
             self.seqvar_result.data.scores.dbnsfp.phyloP100 = self._convert_score_val(
                 dbsnfp.phyloP100way_vertebrate
             )
+            self.seqvar_result.data.scores.dbnsfp.sift = self._convert_score_val(dbsnfp.SIFT_score)
+            self.seqvar_result.data.scores.dbnsfp.polyphen2 = self._convert_score_val(
+                dbsnfp.Polyphen2_HVAR_score
+            )
+            self.seqvar_result.data.scores.dbnsfp.mutationTaster = self._convert_score_val(
+                dbsnfp.MutationTaster_score
+            )
+            self.seqvar_result.data.scores.dbnsfp.fathmm = self._convert_score_val(
+                dbsnfp.FATHMM_score
+            )
+            self.seqvar_result.data.scores.dbnsfp.provean = self._convert_score_val(
+                dbsnfp.PROVEAN_score
+            )
+            self.seqvar_result.data.scores.dbnsfp.vest4 = self._convert_score_val(
+                dbsnfp.VEST4_score
+            )
+            self.seqvar_result.data.scores.dbnsfp.mutpred = self._convert_score_val(
+                dbsnfp.MutPred_score
+            )
+            self.seqvar_result.data.scores.dbnsfp.primateAI = self._convert_score_val(
+                dbsnfp.PrimateAI_score
+            )
         if dbscsnv := variant_info.dbscsnv:
             self.seqvar_result.data.scores.dbscsnv.ada = dbscsnv.ada_score
             self.seqvar_result.data.scores.dbscsnv.rf = dbscsnv.rf_score

diff --git a/src/defs/annonars_variant.py b/src/defs/annonars_variant.py
@@ -52,10 +52,15 @@ class Dbnsfp(BaseModel):
     BayesDel_noAF_score: Optional[Union[str, float, int]] = None
     REVEL_score: Optional[Union[str, float, int]] = None
     CADD_raw: Optional[Union[str, float, int]] = None
-    PrimateAI_score: Optional[Union[str, float, int]] = None
     Polyphen2_HVAR_score: Optional[Union[str, float, int]] = None
-    VEST4_score: Optional[Union[str, float, int]] = None
     phyloP100way_vertebrate: Optional[Union[str, float, int]] = None
+    SIFT_score: Optional[Union[str, float, int]] = None
+    MutationTaster_score: Optional[Union[str, float, int]] = None
+    FATHMM_score: Optional[Union[str, float, int]] = None
+    PROVEAN_score: Optional[Union[str, float, int]] = None
+    VEST4_score: Optional[Union[str, float, int]] = None
+    MutPred_score: Optional[Union[str, float, int]] = None
+    PrimateAI_score: Optional[Union[str, float, int]] = None
     HGVSc_ANNOVAR: Optional[str] = None
     HGVSp_ANNOVAR: Optional[str] = None
     HGVSc_snpEff: Optional[str] = None

diff --git a/src/defs/auto_acmg.py b/src/defs/auto_acmg.py
@@ -550,6 +550,14 @@ class AutoACMGDbnsfp(AutoAcmgBaseModel):
     bayesDel_noAF: Optional[float] = None
     revel: Optional[float] = None
     phyloP100: Optional[float] = None
+    sift: Optional[float] = None
+    polyphen2: Optional[float] = None
+    mutationTaster: Optional[float] = None
+    fathmm: Optional[float] = None
+    provean: Optional[float] = None
+    vest4: Optional[float] = None
+    mutpred: Optional[float] = None
+    primateAI: Optional[float] = None
 
 
 class AutoACMGDbscsnv(AutoAcmgBaseModel):
@@ -590,10 +598,14 @@ class AutoACMGSeqVarTresholds(AutoAcmgBaseModel):
     metaRNN_pathogenic: float = 0.841
     #: BayesDel_noAF pathogenic threshold
     bayesDel_noAF_pathogenic: float = 0.521
+    #: Revel pathogenic threshold
+    revel_pathogenic: float = 100.0
     #: MetaRNN benign threshold
     metaRNN_benign: float = 0.267
     #: BayesDel_noAF benign threshold
     bayesDel_noAF_benign: float = -0.476
+    #: Revel benign threshold
+    revel_benign: float = -100.0
     #: PP2 and BP1 pathogenic threshold
     pp2bp1_pathogenic: float = 0.808
     #: PP2 and BP1 benign threshold
@@ -608,6 +620,8 @@ class AutoACMGSeqVarTresholds(AutoAcmgBaseModel):
     pm2_pathogenic: float = 0.0001
     #: Minimum number of alleles
     an_min: int = 2000
+    #: PP3 and BP4 strategy
+    pp3bp4_strategy: str = "default"
     #: BP7 donor position
     bp7_donor: int = 1
     #: BP7 acceptor position

diff --git a/src/seqvar/auto_pp3_bp4.py b/src/seqvar/auto_pp3_bp4.py
@@ -26,13 +26,12 @@ def __init__(self):
         #: Comment to store the prediction explanation.
         self.comment_pp3bp4: str = ""
 
-    @staticmethod
-    def _splice_variant(var_data: AutoACMGSeqVarData) -> bool:
+    def _is_splice_variant(self, var_data: AutoACMGSeqVarData) -> bool:
         """
         Check if the variant's consequence is a splice related.
 
         Args:
-            var_data (AutoACMGData): The variant information.
+            var_data (AutoACMGSeqVarData): The variant information.
 
         Returns:
             bool: True if the variant is a splice variant, False otherwise.
@@ -43,66 +42,134 @@ def _splice_variant(var_data: AutoACMGSeqVarData) -> bool:
             return True
         return False
 
-    @staticmethod
-    def _is_pathogenic_score(var_data: AutoACMGSeqVarData) -> bool:
+    def _is_inframe_indel(self, var_data: AutoACMGSeqVarData) -> bool:
         """
-        Check if any of the pathogenic scores meet the threshold.
-
-        Check if any of the pathogenic scores meet the threshold. If the variant is pathogenic
-        based on the scores, return True.
-
+        Check if the variant's consequence is an inframe indel.
         Args:
-            variant_info (VariantResult): Variant information.
-
+            var_data (AutoACMGSeqVarData): The variant information.
         Returns:
-            bool: True if the variant is pathogenic, False otherwise.
-
-        Raises:
-            MissingDataError: If the variant information is missing.
+            bool: True if the variant is an inframe indel, False otherwise.
         """
-        if (
-            var_data.scores.dbnsfp.metaRNN
-            and var_data.scores.dbnsfp.metaRNN >= var_data.thresholds.metaRNN_pathogenic
-        ):
+        if "inframe" in var_data.consequence.cadd:
             return True
-        if (
-            var_data.scores.dbnsfp.bayesDel_noAF
-            and var_data.scores.dbnsfp.bayesDel_noAF >= var_data.thresholds.bayesDel_noAF_pathogenic
-        ):
+        if any("inframe" in cons for cons in var_data.consequence.mehari):
             return True
         return False
 
-    @staticmethod
-    def _is_benign_score(var_data: AutoACMGSeqVarData) -> bool:
+    def _is_missense_variant(self, var_data: AutoACMGSeqVarData) -> bool:
         """
-        Check if any of the benign scores meet the threshold.
-
-        Check if any of the benign scores meet the threshold. If the variant is benign
-        based on the scores, return True.
+        Check if the variant's consequence is a missense variant.
+        Args:
+            var_data (AutoACMGSeqVarData): The variant information.
+        Returns:
+            bool: True if the variant is a missense variant, False otherwise.
+        """
+        if "missense" in var_data.consequence.cadd:
+            return True
+        if "missense_variant" in var_data.consequence.mehari:
+            return True
+        return False
 
+    def _is_synonymous_variant(self, var_data: AutoACMGSeqVarData) -> bool:
+        """
+        Check if the variant's consequence is a synonymous variant.
         Args:
-            variant_info (VariantResult): Variant information.
+            var_data (AutoACMGSeqVarData): The variant information.
+        Returns:
+            bool: True if the variant is a synonymous variant, False otherwise.
+        """
+        if "synonymous" in var_data.consequence.cadd:
+            return True
+        if "synonymous_variant" in var_data.consequence.mehari:
+            return True
+        return False
 
+    def _is_intron_variant(self, var_data: AutoACMGSeqVarData) -> bool:
+        """
+        Check if the variant's consequence is an intron variant.
+        Args:
+            var_data (AutoACMGSeqVarData): The variant information.
         Returns:
-            bool: True if the variant is benign, False otherwise.
+            bool: True if the variant is an intron variant, False otherwise.
+        """
+        if "intron" in var_data.consequence.cadd:
+            return True
+        if any("intron" in cons for cons in var_data.consequence.mehari):
+            return True
+        return False
 
-        Raises:
-            MissingDataError: If the variant information is missing.
+    def _is_utr_variant(self, var_data: AutoACMGSeqVarData) -> bool:
         """
-        if (
-            var_data.scores.dbnsfp.metaRNN
-            and var_data.scores.dbnsfp.metaRNN <= var_data.thresholds.metaRNN_benign
-        ):
+        Check if the variant's consequence is an UTR variant.
+        Args:
+            var_data (AutoACMGSeqVarData): The variant information.
+        Returns:
+            bool: True if the variant is an UTR variant, False otherwise.
+        """
+        if (x in var_data.consequence.cadd for x in ["UTR", "utr"]):
             return True
-        if (
-            var_data.scores.dbnsfp.bayesDel_noAF
-            and var_data.scores.dbnsfp.bayesDel_noAF <= var_data.thresholds.bayesDel_noAF_benign
+        if any("utr" in cons for cons in var_data.consequence.mehari) or any(
+            "UTR" in cons for cons in var_data.consequence.mehari
         ):
             return True
         return False
 
-    @staticmethod
-    def _is_pathogenic_splicing(var_data: AutoACMGSeqVarData) -> bool:
+    def _is_pathogenic_score(
+        self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
+    ) -> bool:
+        """
+        Check if any of the specified scores meet their corresponding threshold.
+        Args:
+            var_data (AutoACMGSeqVarData): Variant data containing scores and thresholds.
+            score_threshold_pairs (Tuple[str, float]): Pairs of score attributes and their corresponding pathogenic thresholds.
+        Returns:
+            bool: True if any of the specified scores meet their corresponding threshold, False otherwise.
+        """
+        for score_attr, threshold in score_threshold_pairs:
+            score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
+            if score_value is not None and score_value >= threshold:
+                return True
+        return False
+
+    def _is_benign_score(
+        self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
+    ) -> bool:
+        """
+        Check if any of the specified scores meet their corresponding threshold.
+        Args:
+            var_data (AutoACMGSeqVarData): Variant data containing scores and thresholds.
+            score_threshold_pairs (Tuple[str, float]): Pairs of score attributes and their corresponding benign thresholds.
+        Returns:
+            bool: True if any of the specified scores meet their corresponding threshold, False otherwise.
+        """
+        for score_attr, threshold in score_threshold_pairs:
+            score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
+            if score_value is not None and score_value <= threshold:
+                return True
+        return False
+
+    def _affect_spliceAI(self, var_data: AutoACMGSeqVarData) -> bool:
+        """
+        Predict splice site alterations using SpliceAI.
+        If any of SpliceAI scores are greater than specific thresholds, the variant is considered a
+        splice site alteration. The thresholds are defined in the variant data thresholds.
+        Args:
+            var_data: The data containing variant scores and thresholds.
+        Returns:
+            bool: True if the variant is a splice site alteration, False otherwise.
+        """
+        score_checks = {
+            "spliceAI_acceptor_gain": var_data.thresholds.spliceAI_acceptor_gain,
+            "spliceAI_acceptor_loss": var_data.thresholds.spliceAI_acceptor_loss,
+            "spliceAI_donor_gain": var_data.thresholds.spliceAI_donor_gain,
+            "spliceAI_donor_loss": var_data.thresholds.spliceAI_donor_loss,
+        }
+        return any(
+            (getattr(var_data.scores.cadd, score_name) or 0) > threshold
+            for score_name, threshold in score_checks.items()
+        )
+
+    def _is_pathogenic_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
         """
         Check if the variant is pathogenic based on splicing scores.
 
@@ -127,8 +194,7 @@ def _is_pathogenic_splicing(var_data: AutoACMGSeqVarData) -> bool:
                 return True
         return False
 
-    @staticmethod
-    def _is_benign_splicing(var_data: AutoACMGSeqVarData) -> bool:
+    def _is_benign_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
         """
         Check if the variant is benign based on splicing scores.
 
@@ -166,26 +232,45 @@ def verify_pp3bp4(
             self.prediction_pp3bp4.PP3, self.prediction_pp3bp4.BP4 = False, False
         else:
             try:
-                if self._splice_variant(var_data):
-                    self.comment_pp3bp4 = "Variant is a splice variant."
-                    self.prediction_pp3bp4.PP3 = self._is_pathogenic_splicing(var_data)
-                    self.prediction_pp3bp4.BP4 = self._is_benign_splicing(var_data)
-                    self.comment_pp3bp4 += (
-                        f"Ada score: {var_data.scores.dbscsnv.ada}, "
-                        f"Ada threshold: {var_data.thresholds.ada}. "
-                        f"RF score: {var_data.scores.dbscsnv.rf}, "
-                        f"RF threshold: {var_data.thresholds.rf}. "
+                if (score := var_data.thresholds.pp3bp4_strategy) == "default":
+                    self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
+                        var_data,
+                        ("metaRNN", var_data.thresholds.metaRNN_pathogenic),
+                        ("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_pathogenic),
+                    )
+                    self.prediction_pp3bp4.BP4 = self._is_benign_score(
+                        var_data,
+                        ("metaRNN", var_data.thresholds.metaRNN_benign),
+                        ("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_benign),
                     )
-                else:
-                    self.comment_pp3bp4 = "Variant is not a splice variant."
-                    self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(var_data)
-                    self.prediction_pp3bp4.BP4 = self._is_benign_score(var_data)
                     self.comment_pp3bp4 += (
                         f"MetaRNN score: {var_data.scores.dbnsfp.metaRNN}, "
                         f"MetaRNN threshold: {var_data.thresholds.metaRNN_pathogenic}. "
                         f"BayesDel_noAF score: {var_data.scores.dbnsfp.bayesDel_noAF}, "
                         f"BayesDel_noAF threshold: {var_data.thresholds.bayesDel_noAF_pathogenic}. "
                     )
+                else:
+                    self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
+                        var_data,
+                        (score, getattr(var_data.thresholds, f"{score}_pathogenic")),
+                    )
+                    self.prediction_pp3bp4.BP4 = self._is_benign_score(
+                        var_data,
+                        (score, getattr(var_data.thresholds, f"{score}_benign")),
+                    )
+
+                    self.prediction_pp3bp4.PP3 = (
+                        self.prediction_pp3bp4.PP3 or self._is_pathogenic_splicing(var_data)
+                    )
+                    self.prediction_pp3bp4.BP4 = (
+                        self.prediction_pp3bp4.BP4 or self._is_benign_splicing(var_data)
+                    )
+                    self.comment_pp3bp4 += (
+                        f"Ada score: {var_data.scores.dbscsnv.ada}, "
+                        f"Ada threshold: {var_data.thresholds.ada}. "
+                        f"RF score: {var_data.scores.dbscsnv.rf}, "
+                        f"RF threshold: {var_data.thresholds.rf}. "
+                    )
 
             except AutoAcmgBaseException as e:
                 self.comment_pp3bp4 = f"An error occurred during prediction. Error: {e}"