Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement crit4prot function for stucvars #201

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/acmg_strucvar_implementation.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
.. _acmg_strucvar_implementation:

===============================================================
Implementation of different ACMG criteria for sequence variants
===============================================================
=================================================================
Implementation of different ACMG criteria for structural variants
=================================================================

This section contains the internal building blocks for the
implementation of the different ACMG criteria for sequence variants.
implementation of the different ACMG criteria for structural variants.


----
Expand Down
19 changes: 11 additions & 8 deletions src/api/annonars.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Annonars API client."""

from typing import List, Optional
from typing import List, Optional, Union

import httpx
from loguru import logger
Expand All @@ -13,6 +13,7 @@
from src.defs.annonars_variant import AnnonarsVariantResponse
from src.defs.exceptions import AnnonarsException
from src.defs.seqvar import SeqVar
from src.defs.strucvar import StrucVar

#: Annonars API base URL
ANNONARS_API_BASE_URL = f"{settings.API_REEV_URL}/annonars"
Expand All @@ -28,12 +29,12 @@ def __init__(self, *, api_base_url: Optional[str] = None):
self.cache = Cache()

def _get_variant_from_range(
self, seqvar: SeqVar, start: int, stop: int
self, variant: Union[SeqVar, StrucVar], start: int, stop: int
) -> AnnonarsRangeResponse:
"""Pull all variants within a range.

Args:
seqvar (SeqVar): Sequence variant.
variant (Union[SeqVar, StrucVar]): Sequence or structural variant.
start (int): Start position.
stop (int): Stop position.

Expand All @@ -43,10 +44,12 @@ def _get_variant_from_range(
if abs(stop - start) > 5000:
raise AnnonarsException("Range is too large for a single request.")

gr = variant.genome_release.name.lower()
chromosome = variant.chrom
url = (
f"{self.api_base_url}/annos/range?"
f"genome_release={seqvar.genome_release.name.lower()}"
f"&chromosome={seqvar.chrom}"
f"genome_release={gr}"
f"&chromosome={chromosome}"
f"&start={start}"
f"&stop={stop}"
)
Expand Down Expand Up @@ -76,12 +79,12 @@ def _get_variant_from_range(
raise AnnonarsException("Annonars returned non-validating data.") from e

def get_variant_from_range(
self, seqvar: SeqVar, start: int, stop: int
self, variant: Union[SeqVar, StrucVar], start: int, stop: int
) -> AnnonarsCustomRangeResult:
"""A wrapper for _get_variant_from_range that handles ranges larger than 5000.

Args:
seqvar (SeqVar): Sequence variant.
variant (Union[SeqVar, StrucVar]): Sequence or structural variant.
start (int): Start position.
stop (int): Stop position.

Expand All @@ -93,7 +96,7 @@ def get_variant_from_range(
current_start = start
while current_start < stop:
current_stop = min(current_start + MAX_RANGE_SIZE - 1, stop)
response = self._get_variant_from_range(seqvar, current_start, current_stop)
response = self._get_variant_from_range(variant, current_start, current_stop)
if res.gnomad_genomes is None:
res.gnomad_genomes = response.result.gnomad_genomes
else:
Expand Down
95 changes: 88 additions & 7 deletions src/strucvar/auto_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
GenomicStrand,
)
from src.defs.auto_pvs1 import PVS1Prediction, PVS1PredictionPathMapping, PVS1PredictionStrucVarPath
from src.defs.exceptions import AlgorithmError, MissingDataError
from src.defs.exceptions import (
AlgorithmError,
AutoAcmgBaseException,
InvalidAPIResposeError,
MissingDataError,
)
from src.defs.mehari import Exon
from src.defs.strucvar import StrucVar, StrucVarType
from src.utils import AutoACMGHelper
Expand Down Expand Up @@ -52,6 +57,50 @@ def _minimal_deletion(self, strucvar: StrucVar, exons: List[Exon]) -> bool:
return True
return False

def _count_pathogenic_vars(self, strucvar: StrucVar) -> Tuple[int, int]:
"""
Counts pathogenic variants in the range specified by the structural variant.

The method retrieves variants from the range defined by the structural variant's start and
stop positions and iterates through the ClinVar data of each variant to count the number of
pathogenic variants and the total number of variants.

Args:
strucvar: The structural variant being analyzed.

Returns:
Tuple[int, int]: The number of pathogenic variants and the total number of variants.

Raises:
InvalidAPIResposeError: If the API response is invalid or cannot be processed.
"""
logger.debug(
"Counting pathogenic variants from position {} to {}.", strucvar.start, strucvar.stop
)
if strucvar.stop < strucvar.start:
raise AlgorithmError("End position is less than the start position.")

response = self.annonars_client.get_variant_from_range(
strucvar, strucvar.start, strucvar.stop
)
if response and response.clinvar:
pathogenic_variants = [
v
for v in response.clinvar
if v.records
and (clf := v.records[0].classifications)
and (gc := clf.germlineClassification)
and gc.description in ["Pathogenic", "Likely pathogenic"]
]
logger.debug(
"Pathogenic variants: {}, Total variants: {}",
len(pathogenic_variants),
len(response.clinvar),
)
return len(pathogenic_variants), len(response.clinvar)
else:
raise InvalidAPIResposeError("Failed to get variant from range. No ClinVar data.")

def full_gene_del(self, strucvar: StrucVar, exons: List[Exon]) -> bool:
"""
Check if the variant is a full gene deletion.
Expand Down Expand Up @@ -244,10 +293,42 @@ def in_bio_relevant_tsx(self, transcript_tags: List[str]) -> bool:
self.comment_pvs1 += f"Transcript tags: {', '.join(transcript_tags)}."
return "TRANSCRIPT_TAG_MANE_SELECT" in transcript_tags

@staticmethod
def crit4prot_func() -> bool:
"""Check if the deletion is critical for protein function."""
return False
def crit4prot_func(self, strucvar: StrucVar) -> bool:
"""
Check if the deletion is critical for protein function.

This method is implemented by fetching variants from the start to the end of the structural
variant, then counting the number of pathogenic variants in that region, by iterating
through the clinvar data of each variant. Consider the region critical if the frequency of
pathogenic variants exceeds 5%.
"""
logger.debug("Checking if the deletion is critical for protein function.")

try:
pathogenic_variants, total_variants = self._count_pathogenic_vars(strucvar)
self.comment_pvs1 += (
f"Found {pathogenic_variants} pathogenic variants from {total_variants} total "
f"variants in the range {strucvar.start} - {strucvar.stop}. "
)
if total_variants == 0: # Avoid division by zero
self.comment_pvs1 += "No variants found. Predicted to be non-critical."
return False
if pathogenic_variants / total_variants > 0.05:
self.comment_pvs1 += (
"Frequency of pathogenic variants "
f"{pathogenic_variants/total_variants} exceeds 5%. "
"Predicted to be critical."
)
return True
else:
self.comment_pvs1 += (
"Frequency of pathogenic variants "
f"{pathogenic_variants/total_variants} does not exceed 5%. "
"Predicted to be non-critical."
)
return False
except AutoAcmgBaseException as e:
raise AlgorithmError("Failed to predict criticality for variant.") from e

@staticmethod
def lof_freq_in_pop() -> bool:
Expand Down Expand Up @@ -315,7 +396,7 @@ def verify_pvs1( # pragma: no cover
strucvar, var_data.exons, var_data.strand
) and not self.undergo_nmd(strucvar, var_data.exons, var_data.strand):
self.comment_pvs1 += " =>"
if self.crit4prot_func():
if self.crit4prot_func(strucvar):
self.prediction = PVS1Prediction.PVS1_Strong
self.prediction_path = PVS1PredictionStrucVarPath.DEL4
else:
Expand All @@ -335,7 +416,7 @@ def verify_pvs1( # pragma: no cover
self.prediction_path = PVS1PredictionStrucVarPath.DEL7_1
else:
self.comment_pvs1 += " =>"
if self.crit4prot_func():
if self.crit4prot_func(strucvar):
self.prediction = PVS1Prediction.PVS1_Strong
self.prediction_path = PVS1PredictionStrucVarPath.DEL8
else:
Expand Down
94 changes: 93 additions & 1 deletion tests/strucvar/test_auto_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
GenomicStrand,
)
from src.defs.auto_pvs1 import PVS1Prediction, PVS1PredictionStrucVarPath
from src.defs.exceptions import AlgorithmError, MissingDataError
from src.defs.exceptions import AlgorithmError, InvalidAPIResposeError, MissingDataError
from src.defs.genome_builds import GenomeRelease
from src.defs.mehari import Exon
from src.defs.strucvar import StrucVar, StrucVarType
Expand Down Expand Up @@ -108,6 +108,62 @@ def test_minimal_deletion_non_deletion_variant(strucvar_helper, strucvar):
strucvar_helper._minimal_deletion(strucvar, exons)


# ---------- _count_pathogenic_vars ----------


def test_count_pathogenic_vars_valid_range(strucvar_helper, strucvar, monkeypatch):
"""Test counting pathogenic variants within a valid range."""
mock_response = MagicMock()
mock_response.clinvar = [
MagicMock(
records=[
MagicMock(
classifications=MagicMock(
germlineClassification=MagicMock(description="Pathogenic")
)
)
]
),
MagicMock(
records=[
MagicMock(
classifications=MagicMock(
germlineClassification=MagicMock(description="Likely pathogenic")
)
)
]
),
MagicMock(
records=[
MagicMock(
classifications=MagicMock(
germlineClassification=MagicMock(description="Benign")
)
)
]
),
]
monkeypatch.setattr(
strucvar_helper.annonars_client, "get_variant_from_range", lambda x, y, z: mock_response
)

pathogenic, total = strucvar_helper._count_pathogenic_vars(strucvar)
assert pathogenic == 2
assert total == 3


def test_count_pathogenic_vars_invalid_api_response(strucvar_helper, strucvar, monkeypatch):
"""Test handling invalid API response."""
monkeypatch.setattr(
strucvar_helper.annonars_client,
"get_variant_from_range",
MagicMock(side_effect=InvalidAPIResposeError),
)

with pytest.raises(InvalidAPIResposeError):
strucvar_helper._count_pathogenic_vars(strucvar)


# --------- full_gene_del ---------


Expand Down Expand Up @@ -395,6 +451,42 @@ def test_in_bio_relevant_tsx_multiple_mane_select(strucvar_helper):
), "Transcript with multiple Mane tags should be identified as biologically relevant"


# --------- crit4prot_func ---------


@pytest.mark.parametrize(
"pathogenic_variants, total_variants, expected_result",
[
(10, 100, True), # Pathogenic variants exceed the 5% threshold
(3, 100, False), # Pathogenic variants do not exceed the 5% threshold
(0, 0, False), # No variants found
(5, 100, False), # Exactly 5%, considered non-critical
(10, 0, False), # No total variants, handled by the no variants case
],
)
def test_crit4prot_func(
strucvar_helper, strucvar, pathogenic_variants, total_variants, expected_result, monkeypatch
):
"""Test the crit4prot_func method."""
mock_count_pathogenic = MagicMock(return_value=(pathogenic_variants, total_variants))
monkeypatch.setattr(strucvar_helper, "_count_pathogenic_vars", mock_count_pathogenic)

result = strucvar_helper.crit4prot_func(strucvar)
assert result == expected_result


def test_crit4prot_func_error_handling(strucvar_helper, strucvar, monkeypatch):
"""Test error handling in crit4prot_func."""
monkeypatch.setattr(
strucvar_helper,
"_count_pathogenic_vars",
MagicMock(side_effect=AlgorithmError("Test error")),
)

with pytest.raises(AlgorithmError):
strucvar_helper.crit4prot_func(strucvar)


# ========== AutoPVS1 ============


Expand Down
Loading