Skip to content

Commit

Permalink
Add support of Structural variants (#36)
Browse files Browse the repository at this point in the history
* implement strucvar def

* create skeleton of strucvar_pvs1 file

* remove ds_store and clenup makefile

* tests for strucvar

* add autoPVS1 tests
  • Loading branch information
gromdimon authored Apr 14, 2024
1 parent ea311da commit 1100975
Show file tree
Hide file tree
Showing 14 changed files with 486 additions and 83 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/*.ipynb

.env
.DS_Store

# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
Expand Down
35 changes: 4 additions & 31 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,16 @@ help:
@echo " lint Run lint checks"
@echo " example_run Run example"
@echo " test Run tests"
@echo " ci-test Run tests in CI"
@echo " ci Install dependencies, run lints and tests"
@echo " docs Generate the documentation"
@echo " ci-docs Generate the documentation in CI"
@echo " mksuperuser Create a superuser"
@echo " serve Run the (development) server"
@echo " jupyterlab Run jupyterlab"
@echo " celery Run celery"
@echo " migrate Create alembic versions and upgrade"
@echo " ci-docs Generate the documentation in CI"

.PHONY: deps
deps:
pipenv install --dev

.PHONY: docs-deps
.PHONY: ci-docs-deps
ci-docs-deps:
python -m pip install --upgrade --no-cache-dir pip setuptools
python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
Expand Down Expand Up @@ -65,7 +61,7 @@ flake8:

.PHONY: lint-mypy
lint-mypy:
MYPYPATH=$(PWD)/stubs pipenv run mypy --check-untyped-defs $(DIRS_PYTHON)
pipenv run mypy --check-untyped-defs $(DIRS_PYTHON)

#pipenv run python -m src.main 4-113568536-G-GA --genome_release hg19
.PHONY: example_run
Expand Down Expand Up @@ -97,26 +93,3 @@ docs:
.PHONY: ci-docs
ci-docs:
make -C docs clean html

# .PHONY: mksuperuser
# mksuperuser:
# PYTHONPATH=. pipenv run python app/backend_pre_start.py
# PYTHONPATH=. pipenv run python app/initial_data.py

# .PHONY: serve
# serve:
# pipenv run uvicorn app.main:app --host 0.0.0.0 --port 8080 --reload --workers 8

# .PHONY: celery
# celery:
# PYTHONPATH=. pipenv run \
# watchmedo auto-restart --directory=./ --pattern=*.py --recursive -- \
# celery -A app.worker worker --loglevel=debug --beat -Q main-queue

# .PHONY: jupyterlab
# jupyterlab:
# cp utils/minimal.ipynb tmp.ipynb && \
# PYTHON=. pipenv run \
# jupyter lab \
# --ip=0.0.0.0 --allow-root --NotebookApp.custom_display_url=http://127.0.0.1:8888 \
# tmp.ipynb
74 changes: 58 additions & 16 deletions src/autoPVS1.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
"""Implementations of the PVS1 algorithm."""

from typing import Union

import typer

from src.defs.autopvs1 import PVS1Prediction, PVS1PredictionSeqVarPath
from src.defs.autopvs1 import (
PVS1Prediction,
PVS1PredictionSeqVarPath,
PVS1PredictionStrucVarPath,
)
from src.defs.genome_builds import GenomeRelease
from src.defs.seqvar import SeqVar, SeqVarResolver
from src.defs.strucvar import StrucVar, StrucVarResolver
from src.seqvar_pvs1 import SeqVarPVS1
from src.strucvar_pvs1 import StrucVarPVS1


class AutoPVS1:
Expand All @@ -15,14 +23,23 @@ def __init__(self, variant_name: str, genome_release: GenomeRelease = GenomeRele
self.variant_name = variant_name
self.genome_release = genome_release

def resolve_variant(self) -> SeqVar | None:
def resolve_variant(self) -> SeqVar | StrucVar | None:
"""Resolve the variant."""
# TODO: Add resolve for Structure variants
try:
seqvar_resolver = SeqVarResolver()
seqvar: SeqVar = seqvar_resolver.resolve_seqvar(self.variant_name, self.genome_release)
typer.secho(f"Resolved variant: {seqvar}.", fg=typer.colors.BLUE)
return seqvar
try:
seqvar_resolver = SeqVarResolver()
seqvar: SeqVar = seqvar_resolver.resolve_seqvar(
self.variant_name, self.genome_release
)
typer.secho(f"Resolved variant: {seqvar}.", fg=typer.colors.BLUE)
return seqvar
except Exception as e:
strucvar_resolver = StrucVarResolver()
strucvar: StrucVar = strucvar_resolver.resolve_strucvar(
self.variant_name, self.genome_release
)
typer.secho(f"Resolved structural variant: {strucvar}.", fg=typer.colors.BLUE)
return strucvar
except Exception as e:
typer.secho(e, err=True, fg=typer.colors.RED)
return None
Expand All @@ -34,34 +51,59 @@ def predict(self):

if isinstance(variant, SeqVar):
self.seqvar: SeqVar = variant
self.prediction: PVS1Prediction = PVS1Prediction.NotPVS1
self.prediction_path: PVS1PredictionSeqVarPath = PVS1PredictionSeqVarPath.NotSet
self.seqvar_prediction: PVS1Prediction = PVS1Prediction.NotPVS1
self.seqvar_prediction_path: PVS1PredictionSeqVarPath = PVS1PredictionSeqVarPath.NotSet

try:
typer.secho(
f"Predicting PVS1 for variant {self.seqvar.user_representation}, genome release: {self.genome_release.name}.",
f"Predicting PVS1 for variant {self.seqvar.user_repr}, genome release: {self.genome_release.name}.",
fg=typer.colors.BLUE,
)
seqvar_pvs1 = SeqVarPVS1(self.seqvar)
seqvar_pvs1.initialize()
seqvar_pvs1.verify_PVS1()
self.prediction, self.prediction_path = seqvar_pvs1.get_prediction()
self.seqvar_prediction, self.seqvar_prediction_path = seqvar_pvs1.get_prediction()
typer.secho(
f"PVS1 prediction for {self.seqvar.user_representation}: {self.prediction.name}",
f"PVS1 prediction for {self.seqvar.user_repr}: {self.seqvar_prediction.name}",
fg=typer.colors.GREEN,
)
except Exception as e:
typer.secho(
f"Failed to predict PVS1 for variant {self.seqvar.user_representation}.",
f"Failed to predict PVS1 for variant {self.seqvar.user_repr}.",
err=True,
fg=typer.colors.RED,
)
typer.secho(e, err=True)
return

elif isinstance(variant, str):
# TODO: Add Structure variants PVS1 prediction
pass
elif isinstance(variant, StrucVar):
self.strucvar: StrucVar = variant
self.strucvar_prediction: PVS1Prediction = PVS1Prediction.NotPVS1 # type: ignore
self.strucvar_prediction_path: PVS1PredictionStrucVarPath = PVS1PredictionStrucVarPath.NotSet # type: ignore

try:
typer.secho(
f"Predicting PVS1 for structural variant {self.strucvar.user_repr}, genome release: {self.genome_release.name}.",
fg=typer.colors.BLUE,
)
strucvar_pvs1 = StrucVarPVS1(self.strucvar)
strucvar_pvs1.initialize()
strucvar_pvs1.verify_PVS1()
self.strucvar_prediction, self.strucvar_prediction_path = (
strucvar_pvs1.get_prediction()
)
typer.secho(
f"PVS1 prediction for {self.strucvar.user_repr}: {self.strucvar_prediction.name}",
fg=typer.colors.GREEN,
)
except Exception as e:
typer.secho(
f"Failed to predict PVS1 for structural variant {self.strucvar.user_repr}.",
err=True,
fg=typer.colors.RED,
)
typer.secho(e, err=True)
return
else:
typer.secho(
f"Failed to resolve variant {self.variant_name}.", err=True, fg=typer.colors.RED
Expand Down
4 changes: 3 additions & 1 deletion src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
ALLOWED_GENOME_RELEASES = ["GRCh37", "GRCh38", "hg19", "hg38", "grch37", "grch38"]
#: Allowed sequence variant formats
ALLOWED_SEQVAR_FORMATS = ["Canonical SPDI", "gnomAD", "relaxed SPDI", "dbSNP", "ClinVar"]
#: Allowed structural variant formats
ALLOWED_STRUCVAR_FORMATS = ["Colon-separated", "Hyphen-separated"]


@app.command()
def classify(
variant: Annotated[
str,
typer.Argument(
help=f"Variant to be classified, e.g., 'NM_000038.3:c.797G>A'. Accepted formats: {', '.join(ALLOWED_SEQVAR_FORMATS)}"
help=f"Variant to be classified, e.g., 'NM_000038.3:c.797G>A'. Accepted sequence variants formats: {', '.join(ALLOWED_SEQVAR_FORMATS)}. Accepted structural variants formats: {', '.join(ALLOWED_STRUCVAR_FORMATS)}."
),
],
genome_release: Annotated[
Expand Down
19 changes: 19 additions & 0 deletions src/defs/autopvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ class PVS1PredictionSeqVarPath(Enum):
IC3 = auto()


#: Enumeration for PVS1 prediction path for structural variant
class PVS1PredictionStrucVarPath(Enum):
"""PVS1 prediction path for structure variants."""

NotSet = auto()
DEL1 = auto()
DEL2 = auto()
DEL3 = auto()
DEL4 = auto()
DEL5 = auto()
DEL6 = auto()
DEL7 = auto()
DEL8 = auto()
DUP1 = auto()
DUP2 = auto()
DUP3 = auto()
DUP4 = auto()


#: Enumeration for PVS1 prediction path for structure variants
class PVS1PredictionsStrucVarPath(Enum):
"""PVS1 prediction path for structure variants."""
Expand Down
23 changes: 12 additions & 11 deletions src/defs/seqvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,16 @@ def __init__(
pos: int,
delete: str,
insert: str,
user_representation: Optional[str] = None,
user_repr: Optional[str] = None,
):
self.genome_release = genome_release
self.chrom = self._normalize_chromosome(chrom)
self.pos = pos
self.delete = delete.upper()
self.insert = insert.upper()
self.user_representation = (
user_representation
if user_representation is not None
self.user_repr = (
user_repr
if user_repr
else f"{genome_release.name}-{self.chrom}-{pos}-{delete}-{insert}"
)

Expand All @@ -64,10 +64,11 @@ def _normalize_chromosome(self, chrom: str) -> str:

def __repr__(self):
"""Return a user-friendly representation of the variant."""
return self.user_representation
return self.user_repr


class SeqVarResolver:
"""The class to resolve sequence variants."""

def __init__(self):
pass
Expand Down Expand Up @@ -100,7 +101,7 @@ def _normalize_chrom(self, value: str) -> str:
return value.lower().replace("chr", "").replace("m", "mt").upper()

def _parse_separated_seqvar(
self, value: str, default_genome_build: GenomeRelease = GenomeRelease.GRCh38
self, value: str, default_genome_release: GenomeRelease = GenomeRelease.GRCh38
) -> SeqVar:
"""
Parse a colon/hyphen separated sequence variant representation.
Expand All @@ -119,7 +120,7 @@ def _parse_separated_seqvar(

genome_build_value = match.group("genome_build")
genome_build = (
GenomeRelease[genome_build_value] if genome_build_value else default_genome_build
GenomeRelease[genome_build_value] if genome_build_value else default_genome_release
)
chrom = self._normalize_chrom(match.group("chrom"))
pos = int(match.group("pos"))
Expand All @@ -132,7 +133,7 @@ def _parse_separated_seqvar(
pos=pos,
delete=delete,
insert=insert,
user_representation=value,
user_repr=value,
)
return self._validate_seqvar(variant)

Expand Down Expand Up @@ -170,7 +171,7 @@ def _parse_canonical_spdi_seqvar(self, value: str) -> SeqVar:
pos=pos,
delete=delete,
insert=insert,
user_representation=value,
user_repr=value,
)
return self._validate_seqvar(variant)

Expand All @@ -188,7 +189,7 @@ def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
:raises ParseError: If the variant representation is invalid
"""
try:
return self._parse_separated_seqvar(value, default_genome_build=genome_release)
return self._parse_separated_seqvar(value, default_genome_release=genome_release)
except ParseError:
pass

Expand All @@ -210,7 +211,7 @@ def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
pos=spdi.value.pos,
delete=spdi.value.reference_deleted,
insert=spdi.value.alternate_inserted,
user_representation=value,
user_repr=value,
)
else:
raise ParseError(f"Unable to resolve seqvar: {value}")
Expand Down
Loading

0 comments on commit 1100975

Please sign in to comment.