diff --git a/docs/acmg_strucvar_implementation.rst b/docs/acmg_strucvar_implementation.rst index d13fca3..95fd87f 100644 --- a/docs/acmg_strucvar_implementation.rst +++ b/docs/acmg_strucvar_implementation.rst @@ -1,11 +1,11 @@ .. _acmg_strucvar_implementation: -=============================================================== -Implementation of different ACMG criteria for sequence variants -=============================================================== +================================================================= +Implementation of different ACMG criteria for structural variants +================================================================= This section contains the internal building blocks for the -implementation of the different ACMG criteria for sequence variants. +implementation of the different ACMG criteria for structural variants. ---- diff --git a/src/api/annonars.py b/src/api/annonars.py index ec3a490..ebfba90 100644 --- a/src/api/annonars.py +++ b/src/api/annonars.py @@ -1,6 +1,6 @@ """Annonars API client.""" -from typing import List, Optional +from typing import List, Optional, Union import httpx from loguru import logger @@ -13,6 +13,7 @@ from src.defs.annonars_variant import AnnonarsVariantResponse from src.defs.exceptions import AnnonarsException from src.defs.seqvar import SeqVar +from src.defs.strucvar import StrucVar #: Annonars API base URL ANNONARS_API_BASE_URL = f"{settings.API_REEV_URL}/annonars" @@ -28,12 +29,12 @@ def __init__(self, *, api_base_url: Optional[str] = None): self.cache = Cache() def _get_variant_from_range( - self, seqvar: SeqVar, start: int, stop: int + self, variant: Union[SeqVar, StrucVar], start: int, stop: int ) -> AnnonarsRangeResponse: """Pull all variants within a range. Args: - seqvar (SeqVar): Sequence variant. + variant (Union[SeqVar, StrucVar]): Sequence or structural variant. start (int): Start position. stop (int): Stop position. @@ -43,10 +44,12 @@ def _get_variant_from_range( if abs(stop - start) > 5000: raise AnnonarsException("Range is too large for a single request.") + gr = variant.genome_release.name.lower() + chromosome = variant.chrom url = ( f"{self.api_base_url}/annos/range?" - f"genome_release={seqvar.genome_release.name.lower()}" - f"&chromosome={seqvar.chrom}" + f"genome_release={gr}" + f"&chromosome={chromosome}" f"&start={start}" f"&stop={stop}" ) @@ -76,12 +79,12 @@ def _get_variant_from_range( raise AnnonarsException("Annonars returned non-validating data.") from e def get_variant_from_range( - self, seqvar: SeqVar, start: int, stop: int + self, variant: Union[SeqVar, StrucVar], start: int, stop: int ) -> AnnonarsCustomRangeResult: """A wrapper for _get_variant_from_range that handles ranges larger than 5000. Args: - seqvar (SeqVar): Sequence variant. + variant (Union[SeqVar, StrucVar]): Sequence or structural variant. start (int): Start position. stop (int): Stop position. @@ -93,7 +96,7 @@ def get_variant_from_range( current_start = start while current_start < stop: current_stop = min(current_start + MAX_RANGE_SIZE - 1, stop) - response = self._get_variant_from_range(seqvar, current_start, current_stop) + response = self._get_variant_from_range(variant, current_start, current_stop) if res.gnomad_genomes is None: res.gnomad_genomes = response.result.gnomad_genomes else: diff --git a/src/strucvar/auto_pvs1.py b/src/strucvar/auto_pvs1.py index 8c2ac5a..2da7100 100644 --- a/src/strucvar/auto_pvs1.py +++ b/src/strucvar/auto_pvs1.py @@ -12,7 +12,12 @@ GenomicStrand, ) from src.defs.auto_pvs1 import PVS1Prediction, PVS1PredictionPathMapping, PVS1PredictionStrucVarPath -from src.defs.exceptions import AlgorithmError, MissingDataError +from src.defs.exceptions import ( + AlgorithmError, + AutoAcmgBaseException, + InvalidAPIResposeError, + MissingDataError, +) from src.defs.mehari import Exon from src.defs.strucvar import StrucVar, StrucVarType from src.utils import AutoACMGHelper @@ -52,6 +57,50 @@ def _minimal_deletion(self, strucvar: StrucVar, exons: List[Exon]) -> bool: return True return False + def _count_pathogenic_vars(self, strucvar: StrucVar) -> Tuple[int, int]: + """ + Counts pathogenic variants in the range specified by the structural variant. + + The method retrieves variants from the range defined by the structural variant's start and + stop positions and iterates through the ClinVar data of each variant to count the number of + pathogenic variants and the total number of variants. + + Args: + strucvar: The structural variant being analyzed. + + Returns: + Tuple[int, int]: The number of pathogenic variants and the total number of variants. + + Raises: + InvalidAPIResposeError: If the API response is invalid or cannot be processed. + """ + logger.debug( + "Counting pathogenic variants from position {} to {}.", strucvar.start, strucvar.stop + ) + if strucvar.stop < strucvar.start: + raise AlgorithmError("End position is less than the start position.") + + response = self.annonars_client.get_variant_from_range( + strucvar, strucvar.start, strucvar.stop + ) + if response and response.clinvar: + pathogenic_variants = [ + v + for v in response.clinvar + if v.records + and (clf := v.records[0].classifications) + and (gc := clf.germlineClassification) + and gc.description in ["Pathogenic", "Likely pathogenic"] + ] + logger.debug( + "Pathogenic variants: {}, Total variants: {}", + len(pathogenic_variants), + len(response.clinvar), + ) + return len(pathogenic_variants), len(response.clinvar) + else: + raise InvalidAPIResposeError("Failed to get variant from range. No ClinVar data.") + def full_gene_del(self, strucvar: StrucVar, exons: List[Exon]) -> bool: """ Check if the variant is a full gene deletion. @@ -244,10 +293,42 @@ def in_bio_relevant_tsx(self, transcript_tags: List[str]) -> bool: self.comment_pvs1 += f"Transcript tags: {', '.join(transcript_tags)}." return "TRANSCRIPT_TAG_MANE_SELECT" in transcript_tags - @staticmethod - def crit4prot_func() -> bool: - """Check if the deletion is critical for protein function.""" - return False + def crit4prot_func(self, strucvar: StrucVar) -> bool: + """ + Check if the deletion is critical for protein function. + + This method is implemented by fetching variants from the start to the end of the structural + variant, then counting the number of pathogenic variants in that region, by iterating + through the clinvar data of each variant. Consider the region critical if the frequency of + pathogenic variants exceeds 5%. + """ + logger.debug("Checking if the deletion is critical for protein function.") + + try: + pathogenic_variants, total_variants = self._count_pathogenic_vars(strucvar) + self.comment_pvs1 += ( + f"Found {pathogenic_variants} pathogenic variants from {total_variants} total " + f"variants in the range {strucvar.start} - {strucvar.stop}. " + ) + if total_variants == 0: # Avoid division by zero + self.comment_pvs1 += "No variants found. Predicted to be non-critical." + return False + if pathogenic_variants / total_variants > 0.05: + self.comment_pvs1 += ( + "Frequency of pathogenic variants " + f"{pathogenic_variants/total_variants} exceeds 5%. " + "Predicted to be critical." + ) + return True + else: + self.comment_pvs1 += ( + "Frequency of pathogenic variants " + f"{pathogenic_variants/total_variants} does not exceed 5%. " + "Predicted to be non-critical." + ) + return False + except AutoAcmgBaseException as e: + raise AlgorithmError("Failed to predict criticality for variant.") from e @staticmethod def lof_freq_in_pop() -> bool: @@ -315,7 +396,7 @@ def verify_pvs1( # pragma: no cover strucvar, var_data.exons, var_data.strand ) and not self.undergo_nmd(strucvar, var_data.exons, var_data.strand): self.comment_pvs1 += " =>" - if self.crit4prot_func(): + if self.crit4prot_func(strucvar): self.prediction = PVS1Prediction.PVS1_Strong self.prediction_path = PVS1PredictionStrucVarPath.DEL4 else: @@ -335,7 +416,7 @@ def verify_pvs1( # pragma: no cover self.prediction_path = PVS1PredictionStrucVarPath.DEL7_1 else: self.comment_pvs1 += " =>" - if self.crit4prot_func(): + if self.crit4prot_func(strucvar): self.prediction = PVS1Prediction.PVS1_Strong self.prediction_path = PVS1PredictionStrucVarPath.DEL8 else: diff --git a/tests/strucvar/test_auto_pvs1.py b/tests/strucvar/test_auto_pvs1.py index eafb914..3d98f50 100644 --- a/tests/strucvar/test_auto_pvs1.py +++ b/tests/strucvar/test_auto_pvs1.py @@ -11,7 +11,7 @@ GenomicStrand, ) from src.defs.auto_pvs1 import PVS1Prediction, PVS1PredictionStrucVarPath -from src.defs.exceptions import AlgorithmError, MissingDataError +from src.defs.exceptions import AlgorithmError, InvalidAPIResposeError, MissingDataError from src.defs.genome_builds import GenomeRelease from src.defs.mehari import Exon from src.defs.strucvar import StrucVar, StrucVarType @@ -108,6 +108,62 @@ def test_minimal_deletion_non_deletion_variant(strucvar_helper, strucvar): strucvar_helper._minimal_deletion(strucvar, exons) +# ---------- _count_pathogenic_vars ---------- + + +def test_count_pathogenic_vars_valid_range(strucvar_helper, strucvar, monkeypatch): + """Test counting pathogenic variants within a valid range.""" + mock_response = MagicMock() + mock_response.clinvar = [ + MagicMock( + records=[ + MagicMock( + classifications=MagicMock( + germlineClassification=MagicMock(description="Pathogenic") + ) + ) + ] + ), + MagicMock( + records=[ + MagicMock( + classifications=MagicMock( + germlineClassification=MagicMock(description="Likely pathogenic") + ) + ) + ] + ), + MagicMock( + records=[ + MagicMock( + classifications=MagicMock( + germlineClassification=MagicMock(description="Benign") + ) + ) + ] + ), + ] + monkeypatch.setattr( + strucvar_helper.annonars_client, "get_variant_from_range", lambda x, y, z: mock_response + ) + + pathogenic, total = strucvar_helper._count_pathogenic_vars(strucvar) + assert pathogenic == 2 + assert total == 3 + + +def test_count_pathogenic_vars_invalid_api_response(strucvar_helper, strucvar, monkeypatch): + """Test handling invalid API response.""" + monkeypatch.setattr( + strucvar_helper.annonars_client, + "get_variant_from_range", + MagicMock(side_effect=InvalidAPIResposeError), + ) + + with pytest.raises(InvalidAPIResposeError): + strucvar_helper._count_pathogenic_vars(strucvar) + + # --------- full_gene_del --------- @@ -395,6 +451,42 @@ def test_in_bio_relevant_tsx_multiple_mane_select(strucvar_helper): ), "Transcript with multiple Mane tags should be identified as biologically relevant" +# --------- crit4prot_func --------- + + +@pytest.mark.parametrize( + "pathogenic_variants, total_variants, expected_result", + [ + (10, 100, True), # Pathogenic variants exceed the 5% threshold + (3, 100, False), # Pathogenic variants do not exceed the 5% threshold + (0, 0, False), # No variants found + (5, 100, False), # Exactly 5%, considered non-critical + (10, 0, False), # No total variants, handled by the no variants case + ], +) +def test_crit4prot_func( + strucvar_helper, strucvar, pathogenic_variants, total_variants, expected_result, monkeypatch +): + """Test the crit4prot_func method.""" + mock_count_pathogenic = MagicMock(return_value=(pathogenic_variants, total_variants)) + monkeypatch.setattr(strucvar_helper, "_count_pathogenic_vars", mock_count_pathogenic) + + result = strucvar_helper.crit4prot_func(strucvar) + assert result == expected_result + + +def test_crit4prot_func_error_handling(strucvar_helper, strucvar, monkeypatch): + """Test error handling in crit4prot_func.""" + monkeypatch.setattr( + strucvar_helper, + "_count_pathogenic_vars", + MagicMock(side_effect=AlgorithmError("Test error")), + ) + + with pytest.raises(AlgorithmError): + strucvar_helper.crit4prot_func(strucvar) + + # ========== AutoPVS1 ============