Skip to content

Commit

Permalink
Implement methods
Browse files Browse the repository at this point in the history
  • Loading branch information
gromdimon committed Sep 9, 2024
1 parent ed94872 commit 28a5245
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 10 deletions.
2 changes: 2 additions & 0 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,8 @@ def parse_strucvar_data(self, strucvar: StrucVar) -> AutoACMGStrucVarResult:
gene_transcript.genomeAlignments[0].strand
)
self.strucvar_result.data.exons = gene_transcript.genomeAlignments[0].exons
self.strucvar_result.data.start_cdn = gene_transcript.startCodon
self.strucvar_result.data.stop_cdn = gene_transcript.stopCodon

return self.strucvar_result

Expand Down
4 changes: 2 additions & 2 deletions src/defs/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,10 +672,10 @@ class AutoACMGStrucVarData(AutoAcmgBaseModel):
hgnc_id: str = ""
transcript_id: str = ""
transcript_tags: List[str] = []
# prot_pos: int = -1
# prot_length: int = -1
strand: GenomicStrand = GenomicStrand.NotSet
exons: List[Exon] = []
start_cdn: int = 0
stop_cdn: int = 0


class AutoACMGStrucVarPred(AutoAcmgBaseModel):
Expand Down
2 changes: 1 addition & 1 deletion src/defs/mehari.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Exon(BaseModel):
altEndI: int
altCdsStartI: int
altCdsEndI: int
cigar: str
cigar: Optional[str] = None
ord: Optional[int] = None


Expand Down
131 changes: 124 additions & 7 deletions src/strucvar/auto_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def _count_lof_vars(self, strucvar: StrucVar) -> Tuple[int, int]:
strucvar: The structural variant being analyzed.
Returns:
Tuple[int, int]: The number of frequent LoF variants and the total number of LoF variants.
Tuple[int, int]: The number of frequent LoF variants and the total number of LoF
variants.
Raises:
AlgorithmError: If the end position is less than the start position.
Expand Down Expand Up @@ -157,6 +158,67 @@ def _count_lof_vars(self, strucvar: StrucVar) -> Tuple[int, int]:
"Failed to get variant from range. No gnomAD genomes data."
)

def _calc_cds(
self, exons: List[Exon], strand: GenomicStrand, start_codon: int, stop_codon: int
) -> List[Exon]:
"""
Remove UTRs from exons.
Args:
exons: List of exons for the gene.
strand: The genomic strand of the gene.
start_codon: Position of the start codon.
stop_codon: Position of the stop codon.
Returns:
List[Exon]: List of exons without UTRs.
Raises:
MissingDataError: If the genomic strand is not set.
"""
if strand == GenomicStrand.NotSet:
raise MissingDataError("Genomic strand is not set. Cannot remove UTRs.")

if strand == GenomicStrand.Plus:
# Remove 5' UTR
for i, exon in enumerate(exons):
if exon.altEndI - exon.altStartI + 1 > start_codon:
exon.altStartI += start_codon
break
else:
del exons[i]
start_codon -= exon.altEndI - exon.altStartI + 1

# Remove 3' UTR
for i, exon in enumerate(exons[::-1]):
if exon.altEndI - exon.altStartI + 1 > stop_codon:
exon.altEndI -= stop_codon
break
else:
del exons[-i]
stop_codon -= exon.altEndI - exon.altStartI + 1

elif strand == GenomicStrand.Minus:
# Remove 3' UTR
for i, exon in enumerate(exons):
if exon.altEndI - exon.altStartI + 1 > stop_codon:
exon.altStartI += stop_codon
break
else:
del exons[i]
stop_codon -= exon.altEndI - exon.altStartI + 1

# Remove 5' UTR
for i, exon in enumerate(exons[::-1]):
if exon.altEndI - exon.altStartI + 1 > start_codon:
exon.altEndI -= start_codon
break
else:
del exons[-i]
start_codon -= exon.altEndI - exon.altStartI + 1

return exons

def full_gene_del(self, strucvar: StrucVar, exons: List[Exon]) -> bool:
"""
Check if the variant is a full gene deletion.
Expand Down Expand Up @@ -445,10 +507,53 @@ def lof_freq_in_pop(self, strucvar: StrucVar) -> bool:
except AutoAcmgBaseException as e:
raise AlgorithmError("Failed to predict LoF frequency for structural variant.") from e

@staticmethod
def lof_rm_gt_10pct_of_prot() -> bool:
"""Check if the loss-of-function removes more than 10% of the protein."""
return False
def lof_rm_gt_10pct_of_prot(
self,
strucvar: StrucVar,
exons: List[Exon],
strand: GenomicStrand,
start_codon: int,
stop_codon: int,
) -> bool:
"""
Determine if the deletion removes more than 10% of the protein-coding sequence.
First remove the UTRs from the exons. Then iterate through the CDS exons and calculate the
total CDS length and the length of the deleted region. Return True if the deletion removes
more than 10% of the protein-coding sequence, False otherwise.
Args:
strucvar: The structural variant being analyzed.
exons: List of exons for the gene.
strand: The genomic strand of the gene.
start_codon: Position of the start codon.
stop_codon: Position of the stop codon.
Returns:
bool: True if the deletion removes more than 10% of the protein, False otherwise.
"""
total_cds_length = 0
deleted_length = 0

cds = self._calc_cds(exons, strand, start_codon, stop_codon)

for exon in cds:
total_cds_length += exon.altEndI - exon.altStartI + 1

overlap_start = max(strucvar.start, exon.altStartI)
overlap_end = min(strucvar.stop, exon.altEndI)

if overlap_start <= overlap_end: # Check if there is an overlap
overlap_length = overlap_end - overlap_start + 1
deleted_length += overlap_length

if total_cds_length == 0:
raise AlgorithmError(
"Total CDS length is zero. Cannot determine if deletion removes "
"more than 10% of the protein."
)
deletion_percentage = (deleted_length // 3) / (total_cds_length // 3)
return deletion_percentage > 0.1

@staticmethod
def dup_disrupt_rf() -> bool:
Expand Down Expand Up @@ -518,7 +623,13 @@ def verify_pvs1( # pragma: no cover
self.prediction_path = PVS1PredictionStrucVarPath.DEL5_1
else:
self.comment_pvs1 += " =>"
if self.lof_rm_gt_10pct_of_prot():
if self.lof_rm_gt_10pct_of_prot(
strucvar,
var_data.exons,
var_data.strand,
var_data.start_cdn,
var_data.stop_cdn,
):
self.prediction = PVS1Prediction.PVS1_Strong
self.prediction_path = PVS1PredictionStrucVarPath.DEL6_1
else:
Expand All @@ -538,7 +649,13 @@ def verify_pvs1( # pragma: no cover
self.prediction_path = PVS1PredictionStrucVarPath.DEL5_2
else:
self.comment_pvs1 += " =>"
if self.lof_rm_gt_10pct_of_prot():
if self.lof_rm_gt_10pct_of_prot(
strucvar,
var_data.exons,
var_data.strand,
var_data.start_cdn,
var_data.stop_cdn,
):
self.prediction = PVS1Prediction.PVS1_Strong
self.prediction_path = PVS1PredictionStrucVarPath.DEL6_2
else:
Expand Down

0 comments on commit 28a5245

Please sign in to comment.