Skip to content

Commit

Permalink
First attempt of PP3 and PB4 VCEPs
Browse files Browse the repository at this point in the history
  • Loading branch information
gromdimon committed Sep 5, 2024
1 parent 93f6066 commit 9e235fd
Show file tree
Hide file tree
Showing 17 changed files with 576 additions and 175 deletions.
22 changes: 22 additions & 0 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,28 @@ def parse_seqvar_data(self, seqvar: SeqVar) -> AutoACMGSeqVarResult:
self.seqvar_result.data.scores.dbnsfp.phyloP100 = self._convert_score_val(
dbsnfp.phyloP100way_vertebrate
)
self.seqvar_result.data.scores.dbnsfp.sift = self._convert_score_val(dbsnfp.SIFT_score)
self.seqvar_result.data.scores.dbnsfp.polyphen2 = self._convert_score_val(
dbsnfp.Polyphen2_HVAR_score
)
self.seqvar_result.data.scores.dbnsfp.mutationTaster = self._convert_score_val(
dbsnfp.MutationTaster_score
)
self.seqvar_result.data.scores.dbnsfp.fathmm = self._convert_score_val(
dbsnfp.FATHMM_score
)
self.seqvar_result.data.scores.dbnsfp.provean = self._convert_score_val(
dbsnfp.PROVEAN_score
)
self.seqvar_result.data.scores.dbnsfp.vest4 = self._convert_score_val(
dbsnfp.VEST4_score
)
self.seqvar_result.data.scores.dbnsfp.mutpred = self._convert_score_val(
dbsnfp.MutPred_score
)
self.seqvar_result.data.scores.dbnsfp.primateAI = self._convert_score_val(
dbsnfp.PrimateAI_score
)
if dbscsnv := variant_info.dbscsnv:
self.seqvar_result.data.scores.dbscsnv.ada = dbscsnv.ada_score
self.seqvar_result.data.scores.dbscsnv.rf = dbscsnv.rf_score
Expand Down
9 changes: 7 additions & 2 deletions src/defs/annonars_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,15 @@ class Dbnsfp(BaseModel):
BayesDel_noAF_score: Optional[Union[str, float, int]] = None
REVEL_score: Optional[Union[str, float, int]] = None
CADD_raw: Optional[Union[str, float, int]] = None
PrimateAI_score: Optional[Union[str, float, int]] = None
Polyphen2_HVAR_score: Optional[Union[str, float, int]] = None
VEST4_score: Optional[Union[str, float, int]] = None
phyloP100way_vertebrate: Optional[Union[str, float, int]] = None
SIFT_score: Optional[Union[str, float, int]] = None
MutationTaster_score: Optional[Union[str, float, int]] = None
FATHMM_score: Optional[Union[str, float, int]] = None
PROVEAN_score: Optional[Union[str, float, int]] = None
VEST4_score: Optional[Union[str, float, int]] = None
MutPred_score: Optional[Union[str, float, int]] = None
PrimateAI_score: Optional[Union[str, float, int]] = None
HGVSc_ANNOVAR: Optional[str] = None
HGVSp_ANNOVAR: Optional[str] = None
HGVSc_snpEff: Optional[str] = None
Expand Down
14 changes: 14 additions & 0 deletions src/defs/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,14 @@ class AutoACMGDbnsfp(AutoAcmgBaseModel):
bayesDel_noAF: Optional[float] = None
revel: Optional[float] = None
phyloP100: Optional[float] = None
sift: Optional[float] = None
polyphen2: Optional[float] = None
mutationTaster: Optional[float] = None
fathmm: Optional[float] = None
provean: Optional[float] = None
vest4: Optional[float] = None
mutpred: Optional[float] = None
primateAI: Optional[float] = None


class AutoACMGDbscsnv(AutoAcmgBaseModel):
Expand Down Expand Up @@ -590,10 +598,14 @@ class AutoACMGSeqVarTresholds(AutoAcmgBaseModel):
metaRNN_pathogenic: float = 0.841
#: BayesDel_noAF pathogenic threshold
bayesDel_noAF_pathogenic: float = 0.521
#: Revel pathogenic threshold
revel_pathogenic: float = 100.0
#: MetaRNN benign threshold
metaRNN_benign: float = 0.267
#: BayesDel_noAF benign threshold
bayesDel_noAF_benign: float = -0.476
#: Revel benign threshold
revel_benign: float = -100.0
#: PP2 and BP1 pathogenic threshold
pp2bp1_pathogenic: float = 0.808
#: PP2 and BP1 benign threshold
Expand All @@ -608,6 +620,8 @@ class AutoACMGSeqVarTresholds(AutoAcmgBaseModel):
pm2_pathogenic: float = 0.0001
#: Minimum number of alleles
an_min: int = 2000
#: PP3 and BP4 strategy
pp3bp4_strategy: str = "default"
#: BP7 donor position
bp7_donor: int = 1
#: BP7 acceptor position
Expand Down
201 changes: 143 additions & 58 deletions src/seqvar/auto_pp3_bp4.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,12 @@ def __init__(self):
#: Comment to store the prediction explanation.
self.comment_pp3bp4: str = ""

@staticmethod
def _splice_variant(var_data: AutoACMGSeqVarData) -> bool:
def _is_splice_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant's consequence is a splice related.
Args:
var_data (AutoACMGData): The variant information.
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is a splice variant, False otherwise.
Expand All @@ -43,66 +42,134 @@ def _splice_variant(var_data: AutoACMGSeqVarData) -> bool:
return True
return False

@staticmethod
def _is_pathogenic_score(var_data: AutoACMGSeqVarData) -> bool:
def _is_inframe_indel(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if any of the pathogenic scores meet the threshold.
Check if any of the pathogenic scores meet the threshold. If the variant is pathogenic
based on the scores, return True.
Check if the variant's consequence is an inframe indel.
Args:
variant_info (VariantResult): Variant information.
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is pathogenic, False otherwise.
Raises:
MissingDataError: If the variant information is missing.
bool: True if the variant is an inframe indel, False otherwise.
"""
if (
var_data.scores.dbnsfp.metaRNN
and var_data.scores.dbnsfp.metaRNN >= var_data.thresholds.metaRNN_pathogenic
):
if "inframe" in var_data.consequence.cadd:
return True
if (
var_data.scores.dbnsfp.bayesDel_noAF
and var_data.scores.dbnsfp.bayesDel_noAF >= var_data.thresholds.bayesDel_noAF_pathogenic
):
if any("inframe" in cons for cons in var_data.consequence.mehari):
return True
return False

@staticmethod
def _is_benign_score(var_data: AutoACMGSeqVarData) -> bool:
def _is_missense_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if any of the benign scores meet the threshold.
Check if any of the benign scores meet the threshold. If the variant is benign
based on the scores, return True.
Check if the variant's consequence is a missense variant.
Args:
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is a missense variant, False otherwise.
"""
if "missense" in var_data.consequence.cadd:
return True
if "missense_variant" in var_data.consequence.mehari:
return True
return False

def _is_synonymous_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant's consequence is a synonymous variant.
Args:
variant_info (VariantResult): Variant information.
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is a synonymous variant, False otherwise.
"""
if "synonymous" in var_data.consequence.cadd:
return True
if "synonymous_variant" in var_data.consequence.mehari:
return True
return False

def _is_intron_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant's consequence is an intron variant.
Args:
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is benign, False otherwise.
bool: True if the variant is an intron variant, False otherwise.
"""
if "intron" in var_data.consequence.cadd:
return True
if any("intron" in cons for cons in var_data.consequence.mehari):
return True
return False

Raises:
MissingDataError: If the variant information is missing.
def _is_utr_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
if (
var_data.scores.dbnsfp.metaRNN
and var_data.scores.dbnsfp.metaRNN <= var_data.thresholds.metaRNN_benign
):
Check if the variant's consequence is an UTR variant.
Args:
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is an UTR variant, False otherwise.
"""
if (x in var_data.consequence.cadd for x in ["UTR", "utr"]):
return True
if (
var_data.scores.dbnsfp.bayesDel_noAF
and var_data.scores.dbnsfp.bayesDel_noAF <= var_data.thresholds.bayesDel_noAF_benign
if any("utr" in cons for cons in var_data.consequence.mehari) or any(
"UTR" in cons for cons in var_data.consequence.mehari
):
return True
return False

@staticmethod
def _is_pathogenic_splicing(var_data: AutoACMGSeqVarData) -> bool:
def _is_pathogenic_score(
self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
) -> bool:
"""
Check if any of the specified scores meet their corresponding threshold.
Args:
var_data (AutoACMGSeqVarData): Variant data containing scores and thresholds.
score_threshold_pairs (Tuple[str, float]): Pairs of score attributes and their corresponding pathogenic thresholds.
Returns:
bool: True if any of the specified scores meet their corresponding threshold, False otherwise.
"""
for score_attr, threshold in score_threshold_pairs:
score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
if score_value is not None and score_value >= threshold:
return True
return False

def _is_benign_score(
self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
) -> bool:
"""
Check if any of the specified scores meet their corresponding threshold.
Args:
var_data (AutoACMGSeqVarData): Variant data containing scores and thresholds.
score_threshold_pairs (Tuple[str, float]): Pairs of score attributes and their corresponding benign thresholds.
Returns:
bool: True if any of the specified scores meet their corresponding threshold, False otherwise.
"""
for score_attr, threshold in score_threshold_pairs:
score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
if score_value is not None and score_value <= threshold:
return True
return False

def _affect_spliceAI(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Predict splice site alterations using SpliceAI.
If any of SpliceAI scores are greater than specific thresholds, the variant is considered a
splice site alteration. The thresholds are defined in the variant data thresholds.
Args:
var_data: The data containing variant scores and thresholds.
Returns:
bool: True if the variant is a splice site alteration, False otherwise.
"""
score_checks = {
"spliceAI_acceptor_gain": var_data.thresholds.spliceAI_acceptor_gain,
"spliceAI_acceptor_loss": var_data.thresholds.spliceAI_acceptor_loss,
"spliceAI_donor_gain": var_data.thresholds.spliceAI_donor_gain,
"spliceAI_donor_loss": var_data.thresholds.spliceAI_donor_loss,
}
return any(
(getattr(var_data.scores.cadd, score_name) or 0) > threshold
for score_name, threshold in score_checks.items()
)

def _is_pathogenic_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant is pathogenic based on splicing scores.
Expand All @@ -127,8 +194,7 @@ def _is_pathogenic_splicing(var_data: AutoACMGSeqVarData) -> bool:
return True
return False

@staticmethod
def _is_benign_splicing(var_data: AutoACMGSeqVarData) -> bool:
def _is_benign_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant is benign based on splicing scores.
Expand Down Expand Up @@ -166,26 +232,45 @@ def verify_pp3bp4(
self.prediction_pp3bp4.PP3, self.prediction_pp3bp4.BP4 = False, False
else:
try:
if self._splice_variant(var_data):
self.comment_pp3bp4 = "Variant is a splice variant."
self.prediction_pp3bp4.PP3 = self._is_pathogenic_splicing(var_data)
self.prediction_pp3bp4.BP4 = self._is_benign_splicing(var_data)
self.comment_pp3bp4 += (
f"Ada score: {var_data.scores.dbscsnv.ada}, "
f"Ada threshold: {var_data.thresholds.ada}. "
f"RF score: {var_data.scores.dbscsnv.rf}, "
f"RF threshold: {var_data.thresholds.rf}. "
if (score := var_data.thresholds.pp3bp4_strategy) == "default":
self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
var_data,
("metaRNN", var_data.thresholds.metaRNN_pathogenic),
("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_pathogenic),
)
self.prediction_pp3bp4.BP4 = self._is_benign_score(
var_data,
("metaRNN", var_data.thresholds.metaRNN_benign),
("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_benign),
)
else:
self.comment_pp3bp4 = "Variant is not a splice variant."
self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(var_data)
self.prediction_pp3bp4.BP4 = self._is_benign_score(var_data)
self.comment_pp3bp4 += (
f"MetaRNN score: {var_data.scores.dbnsfp.metaRNN}, "
f"MetaRNN threshold: {var_data.thresholds.metaRNN_pathogenic}. "
f"BayesDel_noAF score: {var_data.scores.dbnsfp.bayesDel_noAF}, "
f"BayesDel_noAF threshold: {var_data.thresholds.bayesDel_noAF_pathogenic}. "
)
else:
self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
var_data,
(score, getattr(var_data.thresholds, f"{score}_pathogenic")),
)
self.prediction_pp3bp4.BP4 = self._is_benign_score(
var_data,
(score, getattr(var_data.thresholds, f"{score}_benign")),
)

self.prediction_pp3bp4.PP3 = (
self.prediction_pp3bp4.PP3 or self._is_pathogenic_splicing(var_data)
)
self.prediction_pp3bp4.BP4 = (
self.prediction_pp3bp4.BP4 or self._is_benign_splicing(var_data)
)
self.comment_pp3bp4 += (
f"Ada score: {var_data.scores.dbscsnv.ada}, "
f"Ada threshold: {var_data.thresholds.ada}. "
f"RF score: {var_data.scores.dbscsnv.rf}, "
f"RF threshold: {var_data.thresholds.rf}. "
)

except AutoAcmgBaseException as e:
self.comment_pp3bp4 = f"An error occurred during prediction. Error: {e}"
Expand Down
Loading

0 comments on commit 9e235fd

Please sign in to comment.