Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement BA1, BS1, BS2, PS4, PM2 criteria #106

Merged
merged 5 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"type": "debugpy",
"request": "launch",
"module": "src.cli",
"args": ["NM_000152.4:c.1A>G", "--genome-release", "grch38"],
"args": ["NM_000277.2(PAH):c.1278T>C", "--genome-release", "grch38"],
"console": "integratedTerminal"
}
]
Expand Down
6 changes: 6 additions & 0 deletions example_variants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,13 @@ NM_000546.5:c.-28-5118_672+241dup (Exon 2-6 duplication) (Gene TP53) (From R
- provem NOT in tandem:


PS1:
NM_004333.4(BRAF):c.739T>C (p.Phe247Leu) (Gene: BRAF) (Note: PS1) (From ClinGen)

PM4:
NC_000011.10:2847957:C:G === NM_000218.2:c.1986C>G (p.Tyr662Ter) (Gene: KCNQ1) (Note: PM4) (From Recommendation 07.09.2018)
NM_000314.6:c.1134_1135insCTGG (p.Tyr379LeufsTer4) (Gene: PTEN) (Note: PM4) (From Recommendation 07.09.2018) ### Debug
NM_000152.4:c.2842_2483insT (p.Leu948SerfsTer70) (Gene: GAA) (Note: PM4) (From Recommendation 07.09.2018) ### Debug

BA1:
NM_000277.2(PAH):c.1278T>C
2 changes: 1 addition & 1 deletion src/api/annonars.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_variant_from_range(
logger.exception("Validation failed: {}", e)
raise AnnonarsException("Annonars returned non-validating data.") from e

def get_variant_info(self, seqvar: SeqVar) -> Optional[AnnonarsVariantResponse]:
def get_variant_info(self, seqvar: SeqVar) -> AnnonarsVariantResponse:
"""Get variant information from Annonars.

Args:
Expand Down
172 changes: 172 additions & 0 deletions src/atuo_ba1_bs1_bs_2_pm2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""Implementation of BA1, BS1, BS2, PM2 prediction for sequence variants."""

import re
from typing import Optional

from loguru import logger

from src.api.annonars import AnnonarsClient
from src.core.config import Config
from src.defs.annonars_variant import AnnonarsVariantResponse, VariantResult
from src.defs.auto_acmg import BA1BS1BS2PM2
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException, MissingDataError
from src.defs.genome_builds import GenomeRelease
from src.defs.seqvar import SeqVar


class AutoBA1BS1BS2PM2:
"""Predicts BA1, BS1, BS2, PM2 criteria for sequence variants."""

def __init__(
self, seqvar: SeqVar, genome_release: GenomeRelease, *, config: Optional[Config] = None
):
#: Configuration to use.
self.config = config or Config()
# Attributes to be set
self.seqvar = seqvar
self.genome_release = genome_release
self.annonars_client = AnnonarsClient(api_base_url=self.config.api_base_url_annonars)
self.prediction: BA1BS1BS2PM2 | None = None

def _get_variant_info(self, seqvar: SeqVar) -> Optional[AnnonarsVariantResponse]:
"""Get variant information from Annonars.

Returns:
dict: Annonars response.
"""
try:
logger.debug("Getting variant information for {}.", seqvar)
return self.annonars_client.get_variant_info(seqvar)
except AutoAcmgBaseException as e:
logger.error("Failed to get variant information. Error: {}", e)
return None

def evaluate_ba1(self, seqvar: SeqVar, variant_data: VariantResult) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.af
):
raise MissingDataError("No allele counts found in variant data")

chrom = seqvar.chrom
allele_freq = variant_data.gnomad_exomes.alleleCounts[0].raw.af

if chrom.startswith("chrM"):
return allele_freq > 0.01
else:
return allele_freq > 0.05

def evaluate_bs1(self, seqvar: SeqVar, variant_data: VariantResult, max_credible_freq) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].byAncestryGroup
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.af
):
raise MissingDataError("No allele counts found in variant data")
chrom = seqvar.chrom
faf = 0.0
for group in variant_data.gnomad_exomes.alleleCounts[0].byAncestryGroup:
if group.faf95 and group.faf95 > faf:
faf = group.faf95

if chrom.startswith("chrM"):
return variant_data.gnomad_exomes.alleleCounts[0].raw.af > 0.005
else:
return faf > max_credible_freq

def evaluate_bs2(self, variant_data: VariantResult) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.ac
or not variant_data.gnomad_exomes.alleleCounts[0].raw.nhomalt
):
raise MissingDataError("No allele counts found in variant data")

gene_mode = "inplace" # Field to identify recessive/dominant/X-linked
allele_count = variant_data.gnomad_exomes.alleleCounts[0].raw.ac

if gene_mode in ["recessive", "X-linked"]:
return allele_count > 2
elif gene_mode == "dominant":
return allele_count > 5
return False

def evaluate_pm2(self, seqvar: SeqVar, variant_data: VariantResult) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.af
or not variant_data.gnomad_exomes.alleleCounts[0].raw.ac
or not variant_data.gnomad_exomes.alleleCounts[0].raw.nhomalt
):
raise MissingDataError("No allele counts found in variant data")

chrom = seqvar.chrom
allele_freq = variant_data.gnomad_exomes.alleleCounts[0].raw.af
gene_mode = "inplace" # Field to identify recessive/dominant/X-linked

if chrom.startswith("chrM"):
return allele_freq < 0.00002
else:
if gene_mode in ["recessive", "X-linked"]:
allele_count = variant_data.gnomad_exomes.alleleCounts[0].raw.ac
if allele_count <= 4:
return True
elif gene_mode == "dominant":
homozygous_count = variant_data.gnomad_exomes.alleleCounts[0].raw.nhomalt
if homozygous_count <= 1:
return True
return allele_freq < 0.0001

def predict(self) -> Optional[BA1BS1BS2PM2]:
"""
Predicts the BA1, BS1, BS2, PM5 criteria for the sequence variant.


Note:
Rules:
BA1: Allele frequency is >5% in Exome Sequencing Project, 1000 Genomes Project, or Exome
Aggregation Consortium.

BS1: Allele frequency greater than expected for disorder.

BS2: Observed in a healthy adult individual for a recessive (homozygous), dominant
(heterozygous), or X-linked (hemizygous) disorder, with full penetrance expected at an
early age.


PM2: Absent from controls (or at extremely low frequency if recessive) in Exome
Sequencing Project, 1000 Genomes or ExAC.

Returns:
BA1BS1BS2PM2: The prediction result.
"""
try:
# Get variant information
variant_info = self._get_variant_info(self.seqvar)
if not variant_info:
raise MissingDataError("No variant information retrieved")

# Instantiate the prediction result
self.prediction = BA1BS1BS2PM2()

# Evaluate each criterion
self.prediction.BA1 = self.evaluate_ba1(self.seqvar, variant_info.result)
self.prediction.BS1 = self.evaluate_bs1(
self.seqvar, variant_info.result, max_credible_freq=0.01
) # Adjust max_credible_freq as needed
self.prediction.BS2 = self.evaluate_bs2(variant_info.result)
self.prediction.PM2 = self.evaluate_pm2(self.seqvar, variant_info.result)
except AutoAcmgBaseException as e:
logger.error("Error occurred during BA1, BS1, BS2, PM5 prediction. Error: {}", e)
self.prediction = None

# Return the prediction result
return self.prediction
31 changes: 31 additions & 0 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from loguru import logger

from src.atuo_ba1_bs1_bs_2_pm2 import AutoBA1BS1BS2PM2
from src.auto_ps1_pm5 import AutoPS1PM5
from src.core.config import Config
from src.defs.auto_pvs1 import (
Expand Down Expand Up @@ -160,6 +161,36 @@ def predict(self):
except AutoAcmgBaseException as e:
logger.error("Failed to predict PS1 and PM5 criteria. Error: {}", e)

# BA1, BS1, BS2, PM2
try:
logger.info("Predicting BA1, BS1, BS2, and PM2.")
ba1bs1bs2pm2 = AutoBA1BS1BS2PM2(
self.seqvar, self.genome_release, config=self.config
)
ba1bs1bs2pm2_prediction = ba1bs1bs2pm2.predict()
if not ba1bs1bs2pm2_prediction:
logger.error("Failed to predict BA1, BS1, BS2, and PM2 criteria.")
else:
self.seqvar_ba1, self.seqvar_bs1, self.seqvar_bs2, self.seqvar_pm2 = (
ba1bs1bs2pm2_prediction.BA1,
ba1bs1bs2pm2_prediction.BS1,
ba1bs1bs2pm2_prediction.BS2,
ba1bs1bs2pm2_prediction.PM2,
)
logger.info(
"BA1 prediction for {}: {}.\n"
"BS1 prediction: {}.\n"
"BS2 prediction: {}.\n"
"PM2 prediction: {}.",
self.seqvar.user_repr,
self.seqvar_ba1,
self.seqvar_bs1,
self.seqvar_bs2,
self.seqvar_pm2,
)
except AutoAcmgBaseException as e:
logger.error("Failed to predict BA1, BS1, BS2, and PM2 criteria. Error: {}", e)

elif isinstance(variant, StrucVar):
logger.info("Currently only PVS1 prediction is implemented for structural variants!")
logger.info(
Expand Down
35 changes: 18 additions & 17 deletions src/auto_ps1_pm5.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Implementation of PS1 prediction for sequence variants."""
"""Implementation of PS1 and PM5 prediction for sequence variants."""

import re
from typing import Optional
Expand Down Expand Up @@ -58,6 +58,9 @@ def _parse_HGVSp(pHGVSp: str) -> Optional[AminoAcid]:
AminoAcid: The new amino acid if the pHGVSp is valid, None otherwise.
"""
try:
# If multiple pHGVSp values are present, take the first one
if ";" in pHGVSp:
pHGVSp = pHGVSp.split(";")[0]
match = re.match(REGEX_HGVSP, pHGVSp)
if match:
return AminoAcid[match.group(3)]
Expand All @@ -77,28 +80,20 @@ def _is_pathogenic(variant_info: VariantResult) -> bool:
Returns:
bool: True if the variant is pathogenic
"""
if (
variant_info.clinvar
and variant_info.clinvar.referenceAssertions
and variant_info.clinvar.referenceAssertions[0].clinicalSignificance
):
return (
variant_info.clinvar.referenceAssertions[0].clinicalSignificance
== "CLINICAL_SIGNIFICANCE_PATHOGENIC"
)
if variant_info.clinvar and variant_info.clinvar.referenceAssertions:
for refAssertion in variant_info.clinvar.referenceAssertions:
if refAssertion.clinicalSignificance and refAssertion.clinicalSignificance in [
"CLINICAL_SIGNIFICANCE_PATHOGENIC",
"CLINICAL_SIGNIFICANCE_LIKELY_PATHOGENIC",
]:
return True
return False

def predict(self) -> Optional[PS1PM5]:
"""
Predicts the criteria PS1 and PM5 for the provided sequence variant.

Note:
Rule:
PS1: Same amino acid change as a previously established pathogenic variant regardless of nucleotide change.
PM5: Novel missense change at an amino acid residue where a different missense change determined to be
pathogenic has been seen before.

Implementation:
Implementation of the rule PS1 and PM5:
The method implements the rule by:
- Getting the primary variant information & parsing the primary amino acid change.
- Iterating over all possible alternative bases & getting the alternative variant information.
Expand All @@ -108,6 +103,12 @@ def predict(self) -> Optional[PS1PM5]:
- If the alternative variant is pathogenic and the amino acid change is different from the primary variant,
then PM5 is set to True.

Note:
Rules:
PS1: Same amino acid change as a previously established pathogenic variant regardless of nucleotide change.
PM5: Novel missense change at an amino acid residue where a different missense change determined to be
pathogenic has been seen before.

Returns:
PS1PM5: The prediction result.
"""
Expand Down
8 changes: 6 additions & 2 deletions src/defs/annonars_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing import Any, List, Optional

from pydantic import BaseModel, Field
from pydantic import BaseModel


class VariantQuery(BaseModel):
Expand All @@ -17,6 +17,10 @@ class VariantQuery(BaseModel):
alternative: str


class GnomadExomes(BaseModel):
alleleCounts: List[AlleleCount]


class Dbnsfp(BaseModel):
HGVSc_ANNOVAR: Optional[str] = None
HGVSp_ANNOVAR: Optional[str] = None
Expand Down Expand Up @@ -122,7 +126,7 @@ class VariantResult(BaseModel):
dbnsfp: Optional[Dbnsfp] = None
dbscsnv: Optional[Any] = None
gnomad_mtdna: Optional[Any] = None
gnomad_exomes: Optional[Any] = None
gnomad_exomes: Optional[GnomadExomes] = None
gnomad_genomes: Optional[GnomadGenomes] = None
helixmtdb: Optional[Any] = None
ucsc_conservation: Optional[Any] = None
Expand Down
9 changes: 9 additions & 0 deletions src/defs/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,12 @@ class PS1PM5(BaseModel):

PS1: bool = False
PM5: bool = False


class BA1BS1BS2PM2(BaseModel):
"""BA1, BS1, BS2, and PM2 criteria prediction."""

BA1: bool = False
BS1: bool = False
BS2: bool = False
PM2: bool = False
Loading