Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement pm1 vcep rules #171

Merged
merged 8 commits into from
Aug 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ src/bench/tmp/*.csv
cache/
cache/**

# Prediction file
prediction.json

# DS_Store file
.DS_Store

Expand Down
121 changes: 119 additions & 2 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Implementations of the PVS1 algorithm."""

from typing import Optional, Type, Union
from typing import Dict, Optional, Type, Union

from loguru import logger

Expand All @@ -19,7 +19,7 @@
from src.defs.auto_acmg import AutoACMGResult, CdsInfo, GenomicStrand
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException, ParseError
from src.defs.genome_builds import GenomeRelease
from src.defs.mehari import ProteinPos, TxPos
from src.defs.mehari import CdsPos, ProteinPos, TxPos
from src.defs.seqvar import SeqVar, SeqVarResolver
from src.utils import SeqVarTranscriptsHelper
from src.vcep import (
Expand All @@ -29,6 +29,34 @@
CDH1Predictor,
CerebralCreatineDeficiencySyndromesPredictor,
CoagulationFactorDeficiencyPredictor,
CongenitalMyopathiesPredictor,
DICER1Predictor,
ENIGMAPredictor,
EpilepsySodiumChannelPredictor,
FamilialHypercholesterolemiaPredictor,
FBN1Predictor,
GlaucomaPredictor,
HBOPCPredictor,
HearingLossPredictor,
HHTPredictor,
InsightColorectalCancerPredictor,
LeberCongenitalAmaurosisPredictor,
LysosomalDiseasesPredictor,
MalignantHyperthermiaPredictor,
MitochondrialDiseasesPredictor,
MonogenicDiabetesPredictor,
MyeloidMalignancyPredictor,
PKUPredictor,
PlateletDisordersPredictor,
PTENPredictor,
PulmonaryHypertensionPredictor,
RASopathyPredictor,
RettAngelmanPredictor,
SCIDPredictor,
ThrombosisPredictor,
TP53Predictor,
VHLPredictor,
VonWillebrandDiseasePredictor,
)

#: Mapping of HGNC gene identifiers to predictor classes.
Expand All @@ -52,6 +80,92 @@
"HGNC:11055": CerebralCreatineDeficiencySyndromesPredictor, # SLC6A8
"HGNC:3546": CoagulationFactorDeficiencyPredictor, # F8
"HGNC:3551": CoagulationFactorDeficiencyPredictor, # F9
"HGNC:7720": CongenitalMyopathiesPredictor, # NEB
"HGNC:129": CongenitalMyopathiesPredictor, # ACTA1
"HGNC:2974": CongenitalMyopathiesPredictor, # DNM2
"HGNC:7448": CongenitalMyopathiesPredictor, # MTM1
"HGNC:10483": CongenitalMyopathiesPredictor, # RYR1
"HGNC:17098": DICER1Predictor, # DICER1
"HGNC:1100": ENIGMAPredictor, # BRCA1
"HGNC:1101": ENIGMAPredictor, # BRCA2
"HGNC:10585": EpilepsySodiumChannelPredictor, # SCN1A
"HGNC:10588": EpilepsySodiumChannelPredictor, # SCN2A
"HGNC:10590": EpilepsySodiumChannelPredictor, # SCN3A
"HGNC:10596": EpilepsySodiumChannelPredictor, # SCN8A
"HGNC:10586": EpilepsySodiumChannelPredictor, # SCN1B
"HGNC:6547": FamilialHypercholesterolemiaPredictor, # LDLR
"HGNC:3603": FBN1Predictor, # FBN1
"HGNC:7610": GlaucomaPredictor, # MYOC
"HGNC:13733": HearingLossPredictor, # CDH23
"HGNC:2180": HearingLossPredictor, # COCH
"HGNC:4284": HearingLossPredictor, # GJB2
"HGNC:6298": HearingLossPredictor, # KCNQ4
"HGNC:7605": HearingLossPredictor, # MYO6
"HGNC:7606": HearingLossPredictor, # MYO7A
"HGNC:8818": HearingLossPredictor, # SLC26A4
"HGNC:11720": HearingLossPredictor, # TECTA
"HGNC:12601": HearingLossPredictor, # USH2A
"HGNC:7594": HearingLossPredictor, # MYO15A
"HGNC:8515": HearingLossPredictor, # OTOF
"HGNC:795": HBOPCPredictor, # ATM
"HGNC:26144": HBOPCPredictor, # PALB2
"HGNC:175": HHTPredictor, # ACVRL1
"HGNC:3349": HHTPredictor, # ENG
"HGNC:583": InsightColorectalCancerPredictor, # APC
"HGNC:7127": InsightColorectalCancerPredictor, # MLH1
"HGNC:7325": InsightColorectalCancerPredictor, # MSH2
"HGNC:7329": InsightColorectalCancerPredictor, # MSH6
"HGNC:9122": InsightColorectalCancerPredictor, # PMS2
"HGNC:10294": LeberCongenitalAmaurosisPredictor, # RPE65
"HGNC:4065": LysosomalDiseasesPredictor, # GAA
"HGNC:10483": MalignantHyperthermiaPredictor, # GBA
"HGNC:23287": MitochondrialDiseasesPredictor, # ETHE1
"HGNC:8806": MitochondrialDiseasesPredictor, # PDHA1
"HGNC:9179": MitochondrialDiseasesPredictor, # POLG
"HGNC:16266": MitochondrialDiseasesPredictor, # SLC19A3
"HGNC:11621": MonogenicDiabetesPredictor, # HNF1A
"HGNC:5024": MonogenicDiabetesPredictor, # HNF4A
"HGNC:4195": MonogenicDiabetesPredictor, # GCK
"HGNC:10471": MyeloidMalignancyPredictor, # RUNX1
"HGNC:8582": PKUPredictor, # PAH
"HGNC:6138": PlateletDisordersPredictor, # ITGA2B
"HGNC:6156": PlateletDisordersPredictor, # ITGB3
"HGNC:9588": PTENPredictor, # PTEN
"HGNC:1078": PulmonaryHypertensionPredictor, # BMPR2
"HGNC:12726": VonWillebrandDiseasePredictor, # VWF
"HGNC:775": ThrombosisPredictor, # SERPINC1
"HGNC:11998": TP53Predictor, # TP53
"HGNC:12687": VHLPredictor, # VHL
"HGNC:15454": RASopathyPredictor, # SHOC2
"HGNC:7989": RASopathyPredictor, # NRAS
"HGNC:9829": RASopathyPredictor, # RAF1
"HGNC:11187": RASopathyPredictor, # SOS1
"HGNC:11188": RASopathyPredictor, # SOS2
"HGNC:9644": RASopathyPredictor, # PTPN11
"HGNC:6407": RASopathyPredictor, # KRAS
"HGNC:6840": RASopathyPredictor, # MAP2K1
"HGNC:5173": RASopathyPredictor, # HRAS
"HGNC:10023": RASopathyPredictor, # RIT1
"HGNC:6842": RASopathyPredictor, # MAP2K2
"HGNC:1097": RASopathyPredictor, # BRAF
"HGNC:7227": RASopathyPredictor, # MRAS
"HGNC:6742": RASopathyPredictor, # LZTR1
"HGNC:17271": RASopathyPredictor, # RRAS2
"HGNC:9282": RASopathyPredictor, # PPP1CB
"HGNC:11634": RettAngelmanPredictor, # TCF4
"HGNC:11079": RettAngelmanPredictor, # SLC9A6
"HGNC:11411": RettAngelmanPredictor, # CDKL5
"HGNC:3811": RettAngelmanPredictor, # FOXG1
"HGNC:6990": RettAngelmanPredictor, # MECP2
"HGNC:12496": RettAngelmanPredictor, # UBE3A
"HGNC:12765": SCIDPredictor, # FOXN1
"HGNC:186": SCIDPredictor, # ADA
"HGNC:17642": SCIDPredictor, # DCLRE1C
"HGNC:6024": SCIDPredictor, # IL7R
"HGNC:6193": SCIDPredictor, # JAK3
"HGNC:9831": SCIDPredictor, # RAG1
"HGNC:9832": SCIDPredictor, # RAG2
"HGNC:6010": SCIDPredictor, # IL2RG
}


Expand Down Expand Up @@ -183,6 +297,9 @@ def parse_data(self, seqvar: SeqVar) -> AutoACMGResult:
self.result.data.tx_pos_utr = (
seqvar_transcript.tx_pos.ord if isinstance(seqvar_transcript.tx_pos, TxPos) else -1
)
self.result.data.cds_pos = (
seqvar_transcript.cds_pos.ord if isinstance(seqvar_transcript.cds_pos, CdsPos) else 0
)
self.result.data.prot_pos = (
seqvar_transcript.protein_pos.ord
if isinstance(seqvar_transcript.protein_pos, ProteinPos)
Expand Down
3 changes: 2 additions & 1 deletion src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def classify(
raise InvalidGenomeBuild("Invalid genome release")

auto_acmg = AutoACMG(variant, genome_release_enum)
auto_acmg.predict()
prediction = auto_acmg.predict()
prediction.save_to_file() if prediction else logger.error("No prediction was made.")
except AutoAcmgBaseException as e:
logger.error("Error occurred: {}", e)

Expand Down
39 changes: 39 additions & 0 deletions src/criteria/auto_pm1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
AutoACMGData,
AutoACMGPrediction,
AutoACMGStrength,
GenomicStrand,
)
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException, InvalidAPIResposeError
from src.defs.genome_builds import GenomeRelease
Expand All @@ -30,6 +31,44 @@ def __init__(self):
#: comment_pm1 to store the prediction explanation.
self.comment_pm1: str = ""

@staticmethod
def _get_affected_exon(var_data: AutoACMGData, seqvar: SeqVar) -> int:
"""
Get the affected exon number for the variant.

Go through all exons and count them before the variant position.
Pay attention to the strand of the gene.

Args:
var_data: AutoACMGData object
seqvar: SeqVar object

Returns:
int: Affected exon number
"""
exon_number = 0
if var_data.strand == GenomicStrand.Plus:
for exon in var_data.exons:
if exon.altStartI >= seqvar.pos:
return exon_number
if exon.altStartI <= seqvar.pos <= exon.altEndI:
exon_number += 1
return exon_number
if exon.altEndI < seqvar.pos:
exon_number += 1
elif var_data.strand == GenomicStrand.Minus:
for exon in var_data.exons[::-1]:
if exon.altEndI <= seqvar.pos:
return exon_number
if exon.altStartI <= seqvar.pos <= exon.altEndI:
exon_number += 1
return exon_number
if exon.altStartI > seqvar.pos:
exon_number += 1
else:
raise AlgorithmError(f"Invalid strand for {var_data.hgnc_id}: {var_data.strand}")
return exon_number

def _count_vars(self, seqvar: SeqVar, start_pos: int, end_pos: int) -> Tuple[int, int]:
"""
Counts pathogenic and benign variants in the specified range.
Expand Down
1 change: 1 addition & 0 deletions src/defs/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ class AutoACMGData(AutoAcmgBaseModel):
transcript_id: str = ""
transcript_tags: List[str] = []
tx_pos_utr: int = -1
cds_pos: int = 0
prot_pos: int = -1
prot_length: int = -1
cds_info: Dict[str, CdsInfo] = {}
Expand Down
5 changes: 5 additions & 0 deletions src/defs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,8 @@ class AutoAcmgBaseModel(BaseModel):
# class Config:
# use_enum_values = True
# arbitrary_types_allowed = True

def save_to_file(self, file_path: str = "prediction.json") -> None:
"""Save the model to a file."""
with open(file_path, "w") as file:
file.write(self.model_dump_json(indent=4))
28 changes: 28 additions & 0 deletions src/vcep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,31 @@
CerebralCreatineDeficiencySyndromesPredictor,
)
from src.vcep.coagulation_factor_deficiency import CoagulationFactorDeficiencyPredictor
from src.vcep.congenital_myopathies import CongenitalMyopathiesPredictor
from src.vcep.dicer1 import DICER1Predictor
from src.vcep.enigma import ENIGMAPredictor
from src.vcep.epilepsy_sodium_channel import EpilepsySodiumChannelPredictor
from src.vcep.familial_hypercholesterolemia import FamilialHypercholesterolemiaPredictor
from src.vcep.fbn1 import FBN1Predictor
from src.vcep.glaucoma import GlaucomaPredictor
from src.vcep.hbopc import HBOPCPredictor
from src.vcep.hearing_loss import HearingLossPredictor
from src.vcep.hhtp import HHTPredictor
from src.vcep.insight_colorectal_cancer import InsightColorectalCancerPredictor
from src.vcep.leber_congenital_amaurosis import LeberCongenitalAmaurosisPredictor
from src.vcep.lysosomal_diseases import LysosomalDiseasesPredictor
from src.vcep.malignant_hyperthermia_susceptibility import MalignantHyperthermiaPredictor
from src.vcep.mitochondrial_diseases import MitochondrialDiseasesPredictor
from src.vcep.monogenic_diabetes import MonogenicDiabetesPredictor
from src.vcep.myeloid_malignancy import MyeloidMalignancyPredictor
from src.vcep.pku import PKUPredictor
from src.vcep.platelet_disorders import PlateletDisordersPredictor
from src.vcep.pten import PTENPredictor
from src.vcep.pulmonary_hypertension import PulmonaryHypertensionPredictor
from src.vcep.rasopathy import RASopathyPredictor
from src.vcep.rett_angelman import RettAngelmanPredictor
from src.vcep.scid import SCIDPredictor
from src.vcep.thrombosis import ThrombosisPredictor
from src.vcep.tp53 import TP53Predictor
from src.vcep.vhl import VHLPredictor
from src.vcep.von_willebrand_disease import VonWillebrandDiseasePredictor
4 changes: 2 additions & 2 deletions src/vcep/acadvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from src.defs.auto_acmg import AutoACMGCriteria, AutoACMGData, AutoACMGPrediction, AutoACMGStrength
from src.defs.seqvar import SeqVar

PM1_CLUSTER_REGIONS = [
PM1_CLUSTER = [
(214, 223), # Nucleotide and substrate binding
(249, 251), # Nucleotide and substrate binding
(460, 466), # Nucleotide and substrate binding
Expand All @@ -25,7 +25,7 @@ def predict_pm1(self, seqvar: SeqVar, var_data: AutoACMGData) -> AutoACMGCriteri
"""Override predict_pm1 to include VCEP-specific logic for ACADVL."""
logger.info("Predict PM1")
# Check if variant falls within critical regions
for start, end in PM1_CLUSTER_REGIONS:
for start, end in PM1_CLUSTER:
if start <= var_data.prot_pos <= end:
comment = (
f"Variant falls within a critical region for ACADVL between positions "
Expand Down
2 changes: 1 addition & 1 deletion src/vcep/cardiomyopathy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Predictor for Cardiomyopathy VCEP.
Included gene:
Included genes:
MYH7 (HGNC:7577),
MYBPC3 (HGNC:7551),
TNNI3 (HGNC:11947),
Expand Down
52 changes: 4 additions & 48 deletions src/vcep/coagulation_factor_deficiency.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,7 @@
from loguru import logger

from src.criteria.default_predictor import DefaultPredictor
from src.defs.auto_acmg import (
AutoACMGCriteria,
AutoACMGData,
AutoACMGPrediction,
AutoACMGStrength,
GenomicStrand,
)
from src.defs.auto_acmg import AutoACMGCriteria, AutoACMGData, AutoACMGPrediction, AutoACMGStrength
from src.defs.exceptions import AlgorithmError
from src.defs.seqvar import SeqVar

Expand All @@ -39,7 +33,7 @@
},
"moderate": {
"residues": [(1667, 1667), (1332, 1332)], # Residues affecting secretion
"regions": [(2267, 2304)], # FXa-binding residues, excluding Ser2283
"domains": [(2267, 2304)], # FXa-binding residues, excluding Ser2283
"excluded_residues": [(2283, 2283)], # Excluded residue in FXa-binding region
},
},
Expand All @@ -60,44 +54,6 @@

class CoagulationFactorDeficiencyPredictor(DefaultPredictor):

@staticmethod
def _get_affected_exon(var_data: AutoACMGData, seqvar: SeqVar) -> int:
"""
Get the affected exon number for the variant.

Go through all exons and count them before the variant position.
Pay attention to the strand of the gene.

Args:
var_data: AutoACMGData object
seqvar: SeqVar object

Returns:
int: Affected exon number
"""
exon_number = 0
if var_data.strand == GenomicStrand.Plus:
for exon in var_data.exons:
if exon.altStartI >= seqvar.pos:
return exon_number
if exon.altStartI <= seqvar.pos <= exon.altEndI:
exon_number += 1
return exon_number
if exon.altEndI < seqvar.pos:
exon_number += 1
elif var_data.strand == GenomicStrand.Minus:
for exon in var_data.exons[::-1]:
if exon.altEndI <= seqvar.pos:
return exon_number
if exon.altStartI <= seqvar.pos <= exon.altEndI:
exon_number += 1
return exon_number
if exon.altStartI > seqvar.pos:
exon_number += 1
else:
raise AlgorithmError(f"Invalid strand for {var_data.hgnc_id}: {var_data.strand}")
return exon_number

def predict_pm1(self, seqvar: SeqVar, var_data: AutoACMGData) -> AutoACMGCriteria:
"""
Override predict_pm1 to include VCEP-specific logic for Coagulation Factor Deficiency.
Expand Down Expand Up @@ -139,8 +95,8 @@ def predict_pm1(self, seqvar: SeqVar, var_data: AutoACMGData) -> AutoACMGCriteri
summary=comment,
)

# Check moderate level criteria for regions, excluding specific residues
for start, end in gene_cluster.get("moderate", {}).get("regions", []):
# Check moderate level criteria for domains, excluding specific residues
for start, end in gene_cluster.get("moderate", {}).get("domains", []):
if start <= var_data.prot_pos <= end:
excluded_residues = gene_cluster.get("moderate", {}).get("excluded_residues", [])
if not any(start <= var_data.prot_pos <= end for start, end in excluded_residues):
Expand Down
Loading
Loading