Skip to content

Commit

Permalink
example implementation of ba1, bs1, bs2, etc
Browse files Browse the repository at this point in the history
  • Loading branch information
gromdimon committed May 27, 2024
1 parent 47b12a3 commit 2f6e217
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 76 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"type": "debugpy",
"request": "launch",
"module": "src.cli",
"args": ["NM_000152.4:c.1A>G", "--genome-release", "grch38"],
"args": ["NM_000277.2(PAH):c.1278T>C", "--genome-release", "grch38"],
"console": "integratedTerminal"
}
]
Expand Down
6 changes: 6 additions & 0 deletions example_variants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,13 @@ NM_000546.5:c.-28-5118_672+241dup (Exon 2-6 duplication) (Gene TP53) (From R
- provem NOT in tandem:


PS1:
NM_004333.4(BRAF):c.739T>C (p.Phe247Leu) (Gene: BRAF) (Note: PS1) (From ClinGen)

PM4:
NC_000011.10:2847957:C:G === NM_000218.2:c.1986C>G (p.Tyr662Ter) (Gene: KCNQ1) (Note: PM4) (From Recommendation 07.09.2018)
NM_000314.6:c.1134_1135insCTGG (p.Tyr379LeufsTer4) (Gene: PTEN) (Note: PM4) (From Recommendation 07.09.2018) ### Debug
NM_000152.4:c.2842_2483insT (p.Leu948SerfsTer70) (Gene: GAA) (Note: PM4) (From Recommendation 07.09.2018) ### Debug

BA1:
NM_000277.2(PAH):c.1278T>C
2 changes: 1 addition & 1 deletion src/api/annonars.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_variant_from_range(
logger.exception("Validation failed: {}", e)
raise AnnonarsException("Annonars returned non-validating data.") from e

def get_variant_info(self, seqvar: SeqVar) -> Optional[AnnonarsVariantResponse]:
def get_variant_info(self, seqvar: SeqVar) -> AnnonarsVariantResponse:
"""Get variant information from Annonars.
Args:
Expand Down
172 changes: 172 additions & 0 deletions src/atuo_ba1_bs1_bs_2_pm2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""Implementation of BA1, BS1, BS2, PM2 prediction for sequence variants."""

import re
from typing import Optional

from loguru import logger

from src.api.annonars import AnnonarsClient
from src.core.config import Config
from src.defs.annonars_variant import AnnonarsVariantResponse, VariantResult
from src.defs.auto_acmg import BA1BS1BS2PM2
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException, MissingDataError
from src.defs.genome_builds import GenomeRelease
from src.defs.seqvar import SeqVar


class AutoBA1BS1BS2PM2:
"""Predicts BA1, BS1, BS2, PM2 criteria for sequence variants."""

def __init__(
self, seqvar: SeqVar, genome_release: GenomeRelease, *, config: Optional[Config] = None
):
#: Configuration to use.
self.config = config or Config()
# Attributes to be set
self.seqvar = seqvar
self.genome_release = genome_release
self.annonars_client = AnnonarsClient(api_base_url=self.config.api_base_url_annonars)
self.prediction: BA1BS1BS2PM2 | None = None

def _get_variant_info(self, seqvar: SeqVar) -> Optional[AnnonarsVariantResponse]:
"""Get variant information from Annonars.
Returns:
dict: Annonars response.
"""
try:
logger.debug("Getting variant information for {}.", seqvar)
return self.annonars_client.get_variant_info(seqvar)
except AutoAcmgBaseException as e:
logger.error("Failed to get variant information. Error: {}", e)
return None

def evaluate_ba1(self, seqvar: SeqVar, variant_data: VariantResult) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.af
):
raise MissingDataError("No allele counts found in variant data")

chrom = seqvar.chrom
allele_freq = variant_data.gnomad_exomes.alleleCounts[0].raw.af

if chrom.startswith("chrM"):
return allele_freq > 0.01
else:
return allele_freq > 0.05

def evaluate_bs1(self, seqvar: SeqVar, variant_data: VariantResult, max_credible_freq) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].byAncestryGroup
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.af
):
raise MissingDataError("No allele counts found in variant data")
chrom = seqvar.chrom
faf = 0.0
for group in variant_data.gnomad_exomes.alleleCounts[0].byAncestryGroup:
if group.faf95 and group.faf95 > faf:
faf = group.faf95

if chrom.startswith("chrM"):
return variant_data.gnomad_exomes.alleleCounts[0].raw.af > 0.005
else:
return faf > max_credible_freq

def evaluate_bs2(self, variant_data: VariantResult) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.ac
or not variant_data.gnomad_exomes.alleleCounts[0].raw.nhomalt
):
raise MissingDataError("No allele counts found in variant data")

gene_mode = "inplace" # Field to identify recessive/dominant/X-linked
allele_count = variant_data.gnomad_exomes.alleleCounts[0].raw.ac

if gene_mode in ["recessive", "X-linked"]:
return allele_count > 2
elif gene_mode == "dominant":
return allele_count > 5
return False

def evaluate_pm2(self, seqvar: SeqVar, variant_data: VariantResult) -> bool:
if (
not variant_data.gnomad_exomes
or not variant_data.gnomad_exomes.alleleCounts
or not variant_data.gnomad_exomes.alleleCounts[0].raw
or not variant_data.gnomad_exomes.alleleCounts[0].raw.af
or not variant_data.gnomad_exomes.alleleCounts[0].raw.ac
or not variant_data.gnomad_exomes.alleleCounts[0].raw.nhomalt
):
raise MissingDataError("No allele counts found in variant data")

chrom = seqvar.chrom
allele_freq = variant_data.gnomad_exomes.alleleCounts[0].raw.af
gene_mode = "inplace" # Field to identify recessive/dominant/X-linked

if chrom.startswith("chrM"):
return allele_freq < 0.00002
else:
if gene_mode in ["recessive", "X-linked"]:
allele_count = variant_data.gnomad_exomes.alleleCounts[0].raw.ac
if allele_count <= 4:
return True
elif gene_mode == "dominant":
homozygous_count = variant_data.gnomad_exomes.alleleCounts[0].raw.nhomalt
if homozygous_count <= 1:
return True
return allele_freq < 0.0001

def predict(self) -> Optional[BA1BS1BS2PM2]:
"""
Predicts the BA1, BS1, BS2, PM5 criteria for the sequence variant.
Note:
Rules:
BA1: Allele frequency is >5% in Exome Sequencing Project, 1000 Genomes Project, or Exome
Aggregation Consortium.
BS1: Allele frequency greater than expected for disorder.
BS2: Observed in a healthy adult individual for a recessive (homozygous), dominant
(heterozygous), or X-linked (hemizygous) disorder, with full penetrance expected at an
early age.
PM2: Absent from controls (or at extremely low frequency if recessive) in Exome
Sequencing Project, 1000 Genomes or ExAC.
Returns:
BA1BS1BS2PM2: The prediction result.
"""
try:
# Get variant information
variant_info = self._get_variant_info(self.seqvar)
if not variant_info:
raise MissingDataError("No variant information retrieved")

# Instantiate the prediction result
self.prediction = BA1BS1BS2PM2()

# Evaluate each criterion
self.prediction.BA1 = self.evaluate_ba1(self.seqvar, variant_info.result)
self.prediction.BS1 = self.evaluate_bs1(
self.seqvar, variant_info.result, max_credible_freq=0.01
) # Adjust max_credible_freq as needed
self.prediction.BS2 = self.evaluate_bs2(variant_info.result)
self.prediction.PM2 = self.evaluate_pm2(self.seqvar, variant_info.result)
except AutoAcmgBaseException as e:
logger.error("Error occurred during BA1, BS1, BS2, PM5 prediction. Error: {}", e)
self.prediction = None

# Return the prediction result
return self.prediction
53 changes: 0 additions & 53 deletions src/atuo_ba1_bs1_bs_2_ps4_pm2.py

This file was deleted.

31 changes: 31 additions & 0 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from loguru import logger

from src.atuo_ba1_bs1_bs_2_pm2 import AutoBA1BS1BS2PM2
from src.auto_ps1_pm5 import AutoPS1PM5
from src.core.config import Config
from src.defs.auto_pvs1 import (
Expand Down Expand Up @@ -160,6 +161,36 @@ def predict(self):
except AutoAcmgBaseException as e:
logger.error("Failed to predict PS1 and PM5 criteria. Error: {}", e)

# BA1, BS1, BS2, PM2
try:
logger.info("Predicting BA1, BS1, BS2, and PM2.")
ba1bs1bs2pm2 = AutoBA1BS1BS2PM2(
self.seqvar, self.genome_release, config=self.config
)
ba1bs1bs2pm2_prediction = ba1bs1bs2pm2.predict()
if not ba1bs1bs2pm2_prediction:
logger.error("Failed to predict BA1, BS1, BS2, and PM2 criteria.")
else:
self.seqvar_ba1, self.seqvar_bs1, self.seqvar_bs2, self.seqvar_pm2 = (
ba1bs1bs2pm2_prediction.BA1,
ba1bs1bs2pm2_prediction.BS1,
ba1bs1bs2pm2_prediction.BS2,
ba1bs1bs2pm2_prediction.PM2,
)
logger.info(
"BA1 prediction for {}: {}.\n"
"BS1 prediction: {}.\n"
"BS2 prediction: {}.\n"
"PM2 prediction: {}.",
self.seqvar.user_repr,
self.seqvar_ba1,
self.seqvar_bs1,
self.seqvar_bs2,
self.seqvar_pm2,
)
except AutoAcmgBaseException as e:
logger.error("Failed to predict BA1, BS1, BS2, and PM2 criteria. Error: {}", e)

elif isinstance(variant, StrucVar):
logger.info("Currently only PVS1 prediction is implemented for structural variants!")
logger.info(
Expand Down
33 changes: 17 additions & 16 deletions src/auto_ps1_pm5.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ def _parse_HGVSp(pHGVSp: str) -> Optional[AminoAcid]:
AminoAcid: The new amino acid if the pHGVSp is valid, None otherwise.
"""
try:
# If multiple pHGVSp values are present, take the first one
if ";" in pHGVSp:
pHGVSp = pHGVSp.split(";")[0]
match = re.match(REGEX_HGVSP, pHGVSp)
if match:
return AminoAcid[match.group(3)]
Expand All @@ -77,28 +80,20 @@ def _is_pathogenic(variant_info: VariantResult) -> bool:
Returns:
bool: True if the variant is pathogenic
"""
if (
variant_info.clinvar
and variant_info.clinvar.referenceAssertions
and variant_info.clinvar.referenceAssertions[0].clinicalSignificance
):
return (
variant_info.clinvar.referenceAssertions[0].clinicalSignificance
== "CLINICAL_SIGNIFICANCE_PATHOGENIC"
)
if variant_info.clinvar and variant_info.clinvar.referenceAssertions:
for refAssertion in variant_info.clinvar.referenceAssertions:
if refAssertion.clinicalSignificance and refAssertion.clinicalSignificance in [
"CLINICAL_SIGNIFICANCE_PATHOGENIC",
"CLINICAL_SIGNIFICANCE_LIKELY_PATHOGENIC",
]:
return True
return False

def predict(self) -> Optional[PS1PM5]:
"""
Predicts the criteria PS1 and PM5 for the provided sequence variant.
Note:
Rule:
PS1: Same amino acid change as a previously established pathogenic variant regardless of nucleotide change.
PM5: Novel missense change at an amino acid residue where a different missense change determined to be
pathogenic has been seen before.
Implementation:
Implementation of the rule PS1 and PM5:
The method implements the rule by:
- Getting the primary variant information & parsing the primary amino acid change.
- Iterating over all possible alternative bases & getting the alternative variant information.
Expand All @@ -108,6 +103,12 @@ def predict(self) -> Optional[PS1PM5]:
- If the alternative variant is pathogenic and the amino acid change is different from the primary variant,
then PM5 is set to True.
Note:
Rules:
PS1: Same amino acid change as a previously established pathogenic variant regardless of nucleotide change.
PM5: Novel missense change at an amino acid residue where a different missense change determined to be
pathogenic has been seen before.
Returns:
PS1PM5: The prediction result.
"""
Expand Down
8 changes: 6 additions & 2 deletions src/defs/annonars_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing import Any, List, Optional

from pydantic import BaseModel, Field
from pydantic import BaseModel


class VariantQuery(BaseModel):
Expand All @@ -17,6 +17,10 @@ class VariantQuery(BaseModel):
alternative: str


class GnomadExomes(BaseModel):
alleleCounts: List[AlleleCount]


class Dbnsfp(BaseModel):
HGVSc_ANNOVAR: Optional[str] = None
HGVSp_ANNOVAR: Optional[str] = None
Expand Down Expand Up @@ -122,7 +126,7 @@ class VariantResult(BaseModel):
dbnsfp: Optional[Dbnsfp] = None
dbscsnv: Optional[Any] = None
gnomad_mtdna: Optional[Any] = None
gnomad_exomes: Optional[Any] = None
gnomad_exomes: Optional[GnomadExomes] = None
gnomad_genomes: Optional[GnomadGenomes] = None
helixmtdb: Optional[Any] = None
ucsc_conservation: Optional[Any] = None
Expand Down
Loading

0 comments on commit 2f6e217

Please sign in to comment.