Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement custom PP3 and BP4 rules #196

Merged
merged 4 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"type": "debugpy",
"request": "launch",
"module": "src.cli",
"args": ["NM_000314.7(PTEN):c.1133_1136del", "--genome-release", "grch38"],
"args": ["NM_004958.4(MTOR):c.4448G>A", "--genome-release", "grch38"],
"console": "integratedTerminal"
},
{
Expand Down
24 changes: 23 additions & 1 deletion src/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def _convert_score_val(self, score_value: Optional[Union[str, float, int]]) -> O
if isinstance(score_value, (float, int)):
return float(score_value)
try:
scores = [float(score) for score in score_value.split(";") if score != "."]
scores = [float(score) for score in score_value.split(";") if score not in [".", ""]]
return max(scores) if scores else None
except ValueError as e:
raise AlgorithmError("Failed to convert score value to float.") from e
Expand Down Expand Up @@ -375,6 +375,28 @@ def parse_seqvar_data(self, seqvar: SeqVar) -> AutoACMGSeqVarResult:
self.seqvar_result.data.scores.dbnsfp.phyloP100 = self._convert_score_val(
dbsnfp.phyloP100way_vertebrate
)
self.seqvar_result.data.scores.dbnsfp.sift = self._convert_score_val(dbsnfp.SIFT_score)
self.seqvar_result.data.scores.dbnsfp.polyphen2 = self._convert_score_val(
dbsnfp.Polyphen2_HVAR_score
)
self.seqvar_result.data.scores.dbnsfp.mutationTaster = self._convert_score_val(
dbsnfp.MutationTaster_score
)
self.seqvar_result.data.scores.dbnsfp.fathmm = self._convert_score_val(
dbsnfp.FATHMM_score
)
self.seqvar_result.data.scores.dbnsfp.provean = self._convert_score_val(
dbsnfp.PROVEAN_score
)
self.seqvar_result.data.scores.dbnsfp.vest4 = self._convert_score_val(
dbsnfp.VEST4_score
)
self.seqvar_result.data.scores.dbnsfp.mutpred = self._convert_score_val(
dbsnfp.MutPred_score
)
self.seqvar_result.data.scores.dbnsfp.primateAI = self._convert_score_val(
dbsnfp.PrimateAI_score
)
if dbscsnv := variant_info.dbscsnv:
self.seqvar_result.data.scores.dbscsnv.ada = dbscsnv.ada_score
self.seqvar_result.data.scores.dbscsnv.rf = dbscsnv.rf_score
Expand Down
9 changes: 7 additions & 2 deletions src/defs/annonars_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,15 @@ class Dbnsfp(BaseModel):
BayesDel_noAF_score: Optional[Union[str, float, int]] = None
REVEL_score: Optional[Union[str, float, int]] = None
CADD_raw: Optional[Union[str, float, int]] = None
PrimateAI_score: Optional[Union[str, float, int]] = None
Polyphen2_HVAR_score: Optional[Union[str, float, int]] = None
VEST4_score: Optional[Union[str, float, int]] = None
phyloP100way_vertebrate: Optional[Union[str, float, int]] = None
SIFT_score: Optional[Union[str, float, int]] = None
MutationTaster_score: Optional[Union[str, float, int]] = None
FATHMM_score: Optional[Union[str, float, int]] = None
PROVEAN_score: Optional[Union[str, float, int]] = None
VEST4_score: Optional[Union[str, float, int]] = None
MutPred_score: Optional[Union[str, float, int]] = None
PrimateAI_score: Optional[Union[str, float, int]] = None
HGVSc_ANNOVAR: Optional[str] = None
HGVSp_ANNOVAR: Optional[str] = None
HGVSc_snpEff: Optional[str] = None
Expand Down
18 changes: 18 additions & 0 deletions src/defs/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,14 @@ class AutoACMGDbnsfp(AutoAcmgBaseModel):
bayesDel_noAF: Optional[float] = None
revel: Optional[float] = None
phyloP100: Optional[float] = None
sift: Optional[float] = None
polyphen2: Optional[float] = None
mutationTaster: Optional[float] = None
fathmm: Optional[float] = None
provean: Optional[float] = None
vest4: Optional[float] = None
mutpred: Optional[float] = None
primateAI: Optional[float] = None


class AutoACMGDbscsnv(AutoAcmgBaseModel):
Expand Down Expand Up @@ -590,10 +598,18 @@ class AutoACMGSeqVarTresholds(AutoAcmgBaseModel):
metaRNN_pathogenic: float = 0.841
#: BayesDel_noAF pathogenic threshold
bayesDel_noAF_pathogenic: float = 0.521
#: Revel pathogenic threshold
revel_pathogenic: float = 0.773
#: CADD pathogenic threshold
cadd_pathogenic: float = 20.0
#: MetaRNN benign threshold
metaRNN_benign: float = 0.267
#: BayesDel_noAF benign threshold
bayesDel_noAF_benign: float = -0.476
#: Revel benign threshold
revel_benign: float = 0.016
#: CADD benign threshold
cadd_benign: float = 10.0
#: PP2 and BP1 pathogenic threshold
pp2bp1_pathogenic: float = 0.808
#: PP2 and BP1 benign threshold
Expand All @@ -608,6 +624,8 @@ class AutoACMGSeqVarTresholds(AutoAcmgBaseModel):
pm2_pathogenic: float = 0.0001
#: Minimum number of alleles
an_min: int = 2000
#: PP3 and BP4 strategy
pp3bp4_strategy: str = "default"
#: BP7 donor position
bp7_donor: int = 1
#: BP7 acceptor position
Expand Down
231 changes: 155 additions & 76 deletions src/seqvar/auto_pp3_bp4.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,12 @@ def __init__(self):
#: Comment to store the prediction explanation.
self.comment_pp3bp4: str = ""

@staticmethod
def _splice_variant(var_data: AutoACMGSeqVarData) -> bool:
def _is_splice_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant's consequence is a splice related.

Args:
var_data (AutoACMGData): The variant information.
var_data (AutoACMGSeqVarData): The variant information.

Returns:
bool: True if the variant is a splice variant, False otherwise.
Expand All @@ -43,66 +42,134 @@ def _splice_variant(var_data: AutoACMGSeqVarData) -> bool:
return True
return False

@staticmethod
def _is_pathogenic_score(var_data: AutoACMGSeqVarData) -> bool:
def _is_inframe_indel(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if any of the pathogenic scores meet the threshold.

Check if any of the pathogenic scores meet the threshold. If the variant is pathogenic
based on the scores, return True.

Check if the variant's consequence is an inframe indel.
Args:
variant_info (VariantResult): Variant information.

var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is pathogenic, False otherwise.

Raises:
MissingDataError: If the variant information is missing.
bool: True if the variant is an inframe indel, False otherwise.
"""
if (
var_data.scores.dbnsfp.metaRNN
and var_data.scores.dbnsfp.metaRNN >= var_data.thresholds.metaRNN_pathogenic
):
if "inframe" in var_data.consequence.cadd:
return True
if (
var_data.scores.dbnsfp.bayesDel_noAF
and var_data.scores.dbnsfp.bayesDel_noAF >= var_data.thresholds.bayesDel_noAF_pathogenic
):
if any("inframe" in cons for cons in var_data.consequence.mehari):
return True
return False

@staticmethod
def _is_benign_score(var_data: AutoACMGSeqVarData) -> bool:
def _is_missense_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if any of the benign scores meet the threshold.

Check if any of the benign scores meet the threshold. If the variant is benign
based on the scores, return True.
Check if the variant's consequence is a missense variant.
Args:
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is a missense variant, False otherwise.
"""
if "missense" in var_data.consequence.cadd:
return True
if "missense_variant" in var_data.consequence.mehari:
return True
return False

def _is_synonymous_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant's consequence is a synonymous variant.
Args:
variant_info (VariantResult): Variant information.
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is a synonymous variant, False otherwise.
"""
if "synonymous" in var_data.consequence.cadd:
return True
if "synonymous_variant" in var_data.consequence.mehari:
return True
return False

def _is_intron_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant's consequence is an intron variant.
Args:
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is benign, False otherwise.
bool: True if the variant is an intron variant, False otherwise.
"""
if "intron" in var_data.consequence.cadd:
return True
if any("intron" in cons for cons in var_data.consequence.mehari):
return True
return False

Raises:
MissingDataError: If the variant information is missing.
def _is_utr_variant(self, var_data: AutoACMGSeqVarData) -> bool:
"""
if (
var_data.scores.dbnsfp.metaRNN
and var_data.scores.dbnsfp.metaRNN <= var_data.thresholds.metaRNN_benign
):
Check if the variant's consequence is an UTR variant.
Args:
var_data (AutoACMGSeqVarData): The variant information.
Returns:
bool: True if the variant is an UTR variant, False otherwise.
"""
if (x in var_data.consequence.cadd for x in ["UTR", "utr"]):
return True
if (
var_data.scores.dbnsfp.bayesDel_noAF
and var_data.scores.dbnsfp.bayesDel_noAF <= var_data.thresholds.bayesDel_noAF_benign
if any("utr" in cons for cons in var_data.consequence.mehari) or any(
"UTR" in cons for cons in var_data.consequence.mehari
):
return True
return False

@staticmethod
def _is_pathogenic_splicing(var_data: AutoACMGSeqVarData) -> bool:
def _is_pathogenic_score(
self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
) -> bool:
"""
Check if any of the specified scores meet their corresponding threshold.
Args:
var_data (AutoACMGSeqVarData): Variant data containing scores and thresholds.
score_threshold_pairs (Tuple[str, float]): Pairs of score attributes and their corresponding pathogenic thresholds.
Returns:
bool: True if any of the specified scores meet their corresponding threshold, False otherwise.
"""
for score_attr, threshold in score_threshold_pairs:
score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
if score_value is not None and score_value >= threshold:
return True
return False

def _is_benign_score(
self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
) -> bool:
"""
Check if any of the specified scores meet their corresponding threshold.
Args:
var_data (AutoACMGSeqVarData): Variant data containing scores and thresholds.
score_threshold_pairs (Tuple[str, float]): Pairs of score attributes and their corresponding benign thresholds.
Returns:
bool: True if any of the specified scores meet their corresponding threshold, False otherwise.
"""
for score_attr, threshold in score_threshold_pairs:
score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
if score_value is not None and score_value <= threshold:
return True
return False

def _affect_spliceAI(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Predict splice site alterations using SpliceAI.
If any of SpliceAI scores are greater than specific thresholds, the variant is considered a
splice site alteration. The thresholds are defined in the variant data thresholds.
Args:
var_data: The data containing variant scores and thresholds.
Returns:
bool: True if the variant is a splice site alteration, False otherwise.
"""
score_checks = {
"spliceAI_acceptor_gain": var_data.thresholds.spliceAI_acceptor_gain,
"spliceAI_acceptor_loss": var_data.thresholds.spliceAI_acceptor_loss,
"spliceAI_donor_gain": var_data.thresholds.spliceAI_donor_gain,
"spliceAI_donor_loss": var_data.thresholds.spliceAI_donor_loss,
}
return any(
(getattr(var_data.scores.cadd, score_name) or 0) > threshold
for score_name, threshold in score_checks.items()
)

def _is_pathogenic_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant is pathogenic based on splicing scores.

Expand All @@ -127,8 +194,7 @@ def _is_pathogenic_splicing(var_data: AutoACMGSeqVarData) -> bool:
return True
return False

@staticmethod
def _is_benign_splicing(var_data: AutoACMGSeqVarData) -> bool:
def _is_benign_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
"""
Check if the variant is benign based on splicing scores.

Expand Down Expand Up @@ -159,37 +225,50 @@ def verify_pp3bp4(
"""Predict PP3 and BP4 criteria."""
self.prediction_pp3bp4 = PP3BP4()
self.comment_pp3bp4 = ""
if seqvar.chrom == "MT":
self.comment_pp3bp4 = (
"Variant is in mitochondrial DNA. PP3 and BP4 criteria are not met."
)
self.prediction_pp3bp4.PP3, self.prediction_pp3bp4.BP4 = False, False
else:
try:
if self._splice_variant(var_data):
self.comment_pp3bp4 = "Variant is a splice variant."
self.prediction_pp3bp4.PP3 = self._is_pathogenic_splicing(var_data)
self.prediction_pp3bp4.BP4 = self._is_benign_splicing(var_data)
self.comment_pp3bp4 += (
f"Ada score: {var_data.scores.dbscsnv.ada}, "
f"Ada threshold: {var_data.thresholds.ada}. "
f"RF score: {var_data.scores.dbscsnv.rf}, "
f"RF threshold: {var_data.thresholds.rf}. "
)
else:
self.comment_pp3bp4 = "Variant is not a splice variant."
self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(var_data)
self.prediction_pp3bp4.BP4 = self._is_benign_score(var_data)
self.comment_pp3bp4 += (
f"MetaRNN score: {var_data.scores.dbnsfp.metaRNN}, "
f"MetaRNN threshold: {var_data.thresholds.metaRNN_pathogenic}. "
f"BayesDel_noAF score: {var_data.scores.dbnsfp.bayesDel_noAF}, "
f"BayesDel_noAF threshold: {var_data.thresholds.bayesDel_noAF_pathogenic}. "
)

except AutoAcmgBaseException as e:
self.comment_pp3bp4 = f"An error occurred during prediction. Error: {e}"
self.prediction_pp3bp4 = None
try:
if (score := var_data.thresholds.pp3bp4_strategy) == "default":
self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
var_data,
("metaRNN", var_data.thresholds.metaRNN_pathogenic),
("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_pathogenic),
)
self.prediction_pp3bp4.BP4 = self._is_benign_score(
var_data,
("metaRNN", var_data.thresholds.metaRNN_benign),
("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_benign),
)
self.comment_pp3bp4 += (
f"MetaRNN score: {var_data.scores.dbnsfp.metaRNN}, "
f"MetaRNN threshold: {var_data.thresholds.metaRNN_pathogenic}. "
f"BayesDel_noAF score: {var_data.scores.dbnsfp.bayesDel_noAF}, "
f"BayesDel_noAF threshold: {var_data.thresholds.bayesDel_noAF_pathogenic}. "
)
else:
self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
var_data,
(score, getattr(var_data.thresholds, f"{score}_pathogenic")),
)
self.prediction_pp3bp4.BP4 = self._is_benign_score(
var_data,
(score, getattr(var_data.thresholds, f"{score}_benign")),
)

self.prediction_pp3bp4.PP3 = (
self.prediction_pp3bp4.PP3 or self._is_pathogenic_splicing(var_data)
)
self.prediction_pp3bp4.BP4 = self.prediction_pp3bp4.BP4 or self._is_benign_splicing(
var_data
)
self.comment_pp3bp4 += (
f"Ada score: {var_data.scores.dbscsnv.ada}, "
f"Ada threshold: {var_data.thresholds.ada}. "
f"RF score: {var_data.scores.dbscsnv.rf}, "
f"RF threshold: {var_data.thresholds.rf}. "
)

except AutoAcmgBaseException as e:
self.comment_pp3bp4 = f"An error occurred during prediction. Error: {e}"
self.prediction_pp3bp4 = None
return self.prediction_pp3bp4, self.comment_pp3bp4

def predict_pp3bp4(
Expand Down
Loading
Loading