Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase test coverage #162

Merged
merged 24 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.dev
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DEBUG=1
USE_CACHE=1
API_REEV_URL=https://reev.cubi.bihealth.org/internal/proxy
SEQREPO_DATA_DIR=/home/gromdimon/Custom/seqrepo/master
SEQREPO_DATA_DIR=<path_to_seqrepo_data_dir>
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ tests/assets/**/*.json filter=lfs diff=lfs merge=lfs -text
lib/rmsk/**/*.gz filter=lfs diff=lfs merge=lfs -text
lib/rmsk/**/*.tbi filter=lfs diff=lfs merge=lfs -text
lib/uniprot/**/*.gz filter=lfs diff=lfs merge=lfs -text
lib/uniprot/**/*.tbi filter=lfs diff=lfs merge=lfs -text\
lib/uniprot/**/*.tbi filter=lfs diff=lfs merge=lfs -text
tests/e2e/cassettes/**/* filter=lfs diff=lfs merge=lfs -text
tests/integ/cassettes/**/**/* filter=lfs diff=lfs merge=lfs -text
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ msgpack = "*"
seqrepo = "*"
pytabix = "*"
httpx = "*"
yoyo-migrations = "*"

[dev-packages]
mypy = "*"
Expand Down Expand Up @@ -41,6 +42,7 @@ seaborn = "*"
pytest-env = "*"
pytest-httpx = "*"
pytest-recording = "*"
pytest-asyncio = "*"

[requires]
python_version = "3.12"
1,008 changes: 542 additions & 466 deletions Pipfile.lock

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ coverage:
# Ignoring Paths
# --------------
# which folders/files to ignore
ignore:
- "test/*"
- "src/bench/*"
- "src/core/cache.py"

# Pull request comments:
# ----------------------
# Diff is the Coverage Diff of the pull request.
# Files are the files impacted by the pull request
comment:
layout: diff, files # accepted in any order: reach, diff, flags, and/or files
layout: diff, files # accepted in any order: reach, diff, flags, and/or files
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ env = [
"USE_CACHE=0",
"API_REEV_URL=https://reev.cubi.bihealth.org/internal/proxy",
]
filterwarnings = [
"ignore::DeprecationWarning"
]


[tool.mypy]
Expand Down
1 change: 1 addition & 0 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ def predict(self) -> Optional[AutoACMGResult]:
"""
logger.info("Predicting ACMG criteria for variant: {}", self.variant_name)
self.seqvar = self.resolve_variant()
self.result.seqvar = self.seqvar
if not self.seqvar:
logger.error("Unable to make a prediction for the variant: {}", self.variant_name)
return None
Expand Down
34 changes: 11 additions & 23 deletions src/criteria/auto_pm1.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,30 +98,18 @@ def _get_uniprot_domain(seqvar: SeqVar) -> Optional[Tuple[int, int]]:
Returns:
Optional[Tuple[int, int]]: The start and end positions of the UniProt domain if found,
None otherwise.

Raises:
AlgorithmError: If an error occurs while checking if the variant is in a UniProt domain.
"""
try:
# Find path to the lib file
if seqvar.genome_release == GenomeRelease.GRCh37:
path = os.path.join(
settings.PATH_TO_ROOT, "lib", "uniprot", "grch37", "uniprot.bed.gz"
)
else:
path = os.path.join(
settings.PATH_TO_ROOT, "lib", "uniprot", "grch38", "uniprot.bed.gz"
)
tb = tabix.open(path)
records = tb.query(f"chr{seqvar.chrom}", seqvar.pos - 1, seqvar.pos)
# Return the first record
for record in records:
return int(record[1]), int(record[2])
return None

except tabix.TabixError as e:
logger.error("Failed to check if the variant is in a UniProt domain. Error: {}", e)
raise AlgorithmError("Failed to check if the variant is in a UniProt domain.") from e
# Find path to the lib file
if seqvar.genome_release == GenomeRelease.GRCh37:
path = os.path.join(settings.PATH_TO_ROOT, "lib", "uniprot", "grch37", "uniprot.bed.gz")
else:
path = os.path.join(settings.PATH_TO_ROOT, "lib", "uniprot", "grch38", "uniprot.bed.gz")
tb = tabix.open(path)
records = tb.query(f"chr{seqvar.chrom}", seqvar.pos - 1, seqvar.pos)
# Return the first record
for record in records:
return int(record[1]), int(record[2])
return None

def verify_pm1(self, seqvar: SeqVar, var_data: AutoACMGData) -> Tuple[Optional[PM1], str]:
"""Predict PM1 criteria."""
Expand Down
2 changes: 1 addition & 1 deletion src/criteria/auto_pm4_bp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from typing import Optional, Tuple

import tabix # type: ignore
import tabix
from loguru import logger

from src.core.config import settings
Expand Down
46 changes: 20 additions & 26 deletions src/criteria/auto_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def undergo_nmd(
self,
var_pos: int,
hgnc_id: str,
strand: Optional[GenomicStrand],
strand: GenomicStrand,
exons: List[Exon],
) -> bool:
"""
Expand Down Expand Up @@ -317,7 +317,7 @@ def undergo_nmd(
"Always predicted to undergo NMD."
)
return True
if not strand or not exons:
if strand == GenomicStrand.NotSet or not exons:
logger.error("Strand information or exons are not available. Cannot determine NMD.")
raise MissingDataError(
"Strand information or exons are not available. Cannot determine NMD."
Expand All @@ -344,7 +344,7 @@ def undergo_nmd(
)
return var_pos <= nmd_cutoff

def in_bio_relevant_tsx(self, transcript_tags: List[str]) -> bool:
def in_bio_relevant_tx(self, transcript_tags: List[str]) -> bool:
"""
Checks if the exon with SeqVar is in a biologically relevant transcript.

Expand All @@ -369,7 +369,7 @@ def crit4prot_func(
self,
seqvar: SeqVar,
exons: List[Exon],
strand: Optional[GenomicStrand],
strand: GenomicStrand,
) -> bool:
"""
Checks if the truncated or altered region is critical for the protein function.
Expand Down Expand Up @@ -404,7 +404,7 @@ def crit4prot_func(
InvalidAPIResponseError: If the API response is invalid or cannot be processed.
"""
logger.debug("Checking if the altered region is critical for the protein function.")
if not strand or not exons:
if strand == GenomicStrand.NotSet or not exons:
logger.error(
"Genomic strand or exons are not available. " "Cannot determine criticality."
)
Expand Down Expand Up @@ -446,7 +446,7 @@ def lof_freq_in_pop(
self,
seqvar: SeqVar,
exons: List[Exon],
strand: Optional[GenomicStrand],
strand: GenomicStrand,
) -> bool:
"""
Checks if the Loss-of-Function (LoF) variants in the exon are frequent in the general
Expand Down Expand Up @@ -479,7 +479,7 @@ def lof_freq_in_pop(
InvalidAPIResponseError: If the API response is invalid or cannot be processed.
"""
logger.debug("Checking if LoF variants are frequent in the general population.")
if not strand:
if strand == GenomicStrand.NotSet:
logger.error("Genomic strand is not available. Cannot determine LoF frequency.")
raise MissingDataError(
"Genomic strand position is not available. Cannot determine LoF frequency."
Expand Down Expand Up @@ -544,7 +544,7 @@ def exon_skip_or_cryptic_ss_disrupt(
seqvar: SeqVar,
exons: List[Exon],
consequences: List[str],
strand: Optional[GenomicStrand],
strand: GenomicStrand,
) -> bool:
"""
Check if the variant causes exon skipping or cryptic splice site disruption.
Expand Down Expand Up @@ -575,12 +575,12 @@ def exon_skip_or_cryptic_ss_disrupt(
logger.debug(
"Checking if the variant causes exon skipping or cryptic splice site disruption."
)
if not strand:
if strand == GenomicStrand.NotSet:
logger.error("Strand is not available. Cannot determine exon skipping.")
raise MissingDataError("Strand is not available. Cannot determine exon skipping.")
start_pos, end_pos = self._skipping_exon_pos(seqvar, exons)
self.comment_pvs1 += f"Variant's exon position: {start_pos} - {end_pos}."
if (end_pos - start_pos) % 3 != 0:
if (end_pos - start_pos + 1) % 3 != 0:
logger.debug("Exon length is not a multiple of 3. Predicted to cause exon skipping.")
self.comment_pvs1 += (
"Exon length is not a multiple of 3. Predicted to cause exon skipping."
Expand All @@ -599,14 +599,14 @@ def exon_skip_or_cryptic_ss_disrupt(
cryptic_sites = sp.get_cryptic_ss(refseq, splice_type)
if len(cryptic_sites) > 0:
for site in cryptic_sites:
if abs(site[0] - seqvar.pos) % 3 != 0:
if abs(site[0] - seqvar.pos + 1) % 3 != 0:
logger.debug("Cryptic splice site disruption predicted.")
self.comment_pvs1 += (
"Cryptic splice site disruption predicted. "
f"Cryptic splice site: position {site[0]}, splice context {site[1]}, "
f"maximnum entropy score {site[2]}. "
f"Cryptic splice site - variant position ({seqvar.pos}) = "
f"{abs(site[0] - seqvar.pos)} is not devisible by 3."
f"{abs(site[0] - seqvar.pos + 1)} is not devisible by 3."
)
return True
logger.debug("Cryptic splice site disruption not predicted.")
Expand All @@ -616,7 +616,7 @@ def exon_skip_or_cryptic_ss_disrupt(
f"Cryptic splice site {i}: position {site[0]}, splice context {site[1]}, "
f"maximnum entropy score {site[2]}. "
f"Cryptic splice site - variant position ({seqvar.pos}) = "
f"{abs(site[0] - seqvar.pos)} is devisible by 3."
f"{abs(site[0] - seqvar.pos + 1)} is devisible by 3."
)
else:
logger.debug("No cryptic splice site found. Predicted to preserve reading frame.")
Expand Down Expand Up @@ -662,9 +662,7 @@ def up_pathogenic_vars(
self,
seqvar: SeqVar,
exons: List[Exon],
strand: Optional[GenomicStrand],
cds_info: Dict[str, CdsInfo],
hgvs: str,
strand: GenomicStrand,
) -> bool:
"""
Look for pathogenic variants upstream of the closest potential in-frame start codon.
Expand All @@ -680,8 +678,6 @@ def up_pathogenic_vars(
cds_pos: The position of the variant in the coding sequence.
exons: A list of exons of the gene where the variant occurs.
strand: The genomic strand of the gene.
cds_info: A dictionary containing the CDS information for all transcripts.
hgvs: The main transcript ID.

Returns:
bool: True if pathogenic variants are found upstream of the closest potential in-frame
Expand All @@ -691,7 +687,7 @@ def up_pathogenic_vars(
"Checking for pathogenic variants upstream of the closest in-frame start codon."
)

if not strand:
if strand == GenomicStrand.NotSet:
logger.error("Strand is not available. Cannot determine upstream pathogenic variants.")
raise MissingDataError(
"Strand is not available. Cannot determine upstream pathogenic variants."
Expand Down Expand Up @@ -772,7 +768,7 @@ def verify_pvs1(
var_data.tx_pos_utr, var_data.hgnc_id, var_data.strand, var_data.exons
):
self.comment_pvs1 += " =>\n"
if self.in_bio_relevant_tsx(var_data.transcript_tags):
if self.in_bio_relevant_tx(var_data.transcript_tags):
self.prediction = PVS1Prediction.PVS1
self.prediction_path = PVS1PredictionSeqVarPath.NF1
else:
Expand All @@ -787,7 +783,7 @@ def verify_pvs1(
self.comment_pvs1 += " =>\n"
if self.lof_freq_in_pop(
seqvar, var_data.exons, var_data.strand
) or not self.in_bio_relevant_tsx(var_data.transcript_tags):
) or not self.in_bio_relevant_tx(var_data.transcript_tags):
self.prediction = PVS1Prediction.NotPVS1
self.prediction_path = PVS1PredictionSeqVarPath.NF4
else:
Expand All @@ -807,7 +803,7 @@ def verify_pvs1(
var_data.tx_pos_utr, var_data.hgnc_id, var_data.strand, var_data.exons
):
self.comment_pvs1 += " =>\n"
if self.in_bio_relevant_tsx(var_data.transcript_tags):
if self.in_bio_relevant_tx(var_data.transcript_tags):
self.prediction = PVS1Prediction.PVS1
self.prediction_path = PVS1PredictionSeqVarPath.SS1
else:
Expand All @@ -826,7 +822,7 @@ def verify_pvs1(
self.comment_pvs1 += " =>\n"
if self.lof_freq_in_pop(
seqvar, var_data.exons, var_data.strand
) or not self.in_bio_relevant_tsx(var_data.transcript_tags):
) or not self.in_bio_relevant_tx(var_data.transcript_tags):
self.prediction = PVS1Prediction.NotPVS1
self.prediction_path = PVS1PredictionSeqVarPath.SS4
else:
Expand All @@ -846,7 +842,7 @@ def verify_pvs1(
self.comment_pvs1 += " =>\n"
if self.lof_freq_in_pop(
seqvar, var_data.exons, var_data.strand
) or not self.in_bio_relevant_tsx(var_data.transcript_tags):
) or not self.in_bio_relevant_tx(var_data.transcript_tags):
self.prediction = PVS1Prediction.NotPVS1
self.prediction_path = PVS1PredictionSeqVarPath.SS7
else:
Expand All @@ -869,8 +865,6 @@ def verify_pvs1(
seqvar,
var_data.exons,
var_data.strand,
var_data.cds_info,
var_data.transcript_id,
):
self.prediction = PVS1Prediction.PVS1_Moderate
self.prediction_path = PVS1PredictionSeqVarPath.IC1
Expand Down
Loading
Loading