Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support of Structural variants #36

Merged
merged 5 commits into from
Apr 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/*.ipynb

.env
.DS_Store

# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
Expand Down
35 changes: 4 additions & 31 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,16 @@ help:
@echo " lint Run lint checks"
@echo " example_run Run example"
@echo " test Run tests"
@echo " ci-test Run tests in CI"
@echo " ci Install dependencies, run lints and tests"
@echo " docs Generate the documentation"
@echo " ci-docs Generate the documentation in CI"
@echo " mksuperuser Create a superuser"
@echo " serve Run the (development) server"
@echo " jupyterlab Run jupyterlab"
@echo " celery Run celery"
@echo " migrate Create alembic versions and upgrade"
@echo " ci-docs Generate the documentation in CI"

.PHONY: deps
deps:
pipenv install --dev

.PHONY: docs-deps
.PHONY: ci-docs-deps
ci-docs-deps:
python -m pip install --upgrade --no-cache-dir pip setuptools
python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
Expand Down Expand Up @@ -65,7 +61,7 @@ flake8:

.PHONY: lint-mypy
lint-mypy:
MYPYPATH=$(PWD)/stubs pipenv run mypy --check-untyped-defs $(DIRS_PYTHON)
pipenv run mypy --check-untyped-defs $(DIRS_PYTHON)

#pipenv run python -m src.main 4-113568536-G-GA --genome_release hg19
.PHONY: example_run
Expand Down Expand Up @@ -97,26 +93,3 @@ docs:
.PHONY: ci-docs
ci-docs:
make -C docs clean html

# .PHONY: mksuperuser
# mksuperuser:
# PYTHONPATH=. pipenv run python app/backend_pre_start.py
# PYTHONPATH=. pipenv run python app/initial_data.py

# .PHONY: serve
# serve:
# pipenv run uvicorn app.main:app --host 0.0.0.0 --port 8080 --reload --workers 8

# .PHONY: celery
# celery:
# PYTHONPATH=. pipenv run \
# watchmedo auto-restart --directory=./ --pattern=*.py --recursive -- \
# celery -A app.worker worker --loglevel=debug --beat -Q main-queue

# .PHONY: jupyterlab
# jupyterlab:
# cp utils/minimal.ipynb tmp.ipynb && \
# PYTHON=. pipenv run \
# jupyter lab \
# --ip=0.0.0.0 --allow-root --NotebookApp.custom_display_url=http://127.0.0.1:8888 \
# tmp.ipynb
74 changes: 58 additions & 16 deletions src/autoPVS1.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
"""Implementations of the PVS1 algorithm."""

from typing import Union

import typer

from src.defs.autopvs1 import PVS1Prediction, PVS1PredictionSeqVarPath
from src.defs.autopvs1 import (
PVS1Prediction,
PVS1PredictionSeqVarPath,
PVS1PredictionStrucVarPath,
)
from src.defs.genome_builds import GenomeRelease
from src.defs.seqvar import SeqVar, SeqVarResolver
from src.defs.strucvar import StrucVar, StrucVarResolver
from src.seqvar_pvs1 import SeqVarPVS1
from src.strucvar_pvs1 import StrucVarPVS1


class AutoPVS1:
Expand All @@ -15,14 +23,23 @@ def __init__(self, variant_name: str, genome_release: GenomeRelease = GenomeRele
self.variant_name = variant_name
self.genome_release = genome_release

def resolve_variant(self) -> SeqVar | None:
def resolve_variant(self) -> SeqVar | StrucVar | None:
"""Resolve the variant."""
# TODO: Add resolve for Structure variants
try:
seqvar_resolver = SeqVarResolver()
seqvar: SeqVar = seqvar_resolver.resolve_seqvar(self.variant_name, self.genome_release)
typer.secho(f"Resolved variant: {seqvar}.", fg=typer.colors.BLUE)
return seqvar
try:
seqvar_resolver = SeqVarResolver()
seqvar: SeqVar = seqvar_resolver.resolve_seqvar(
self.variant_name, self.genome_release
)
typer.secho(f"Resolved variant: {seqvar}.", fg=typer.colors.BLUE)
return seqvar
except Exception as e:
strucvar_resolver = StrucVarResolver()
strucvar: StrucVar = strucvar_resolver.resolve_strucvar(
self.variant_name, self.genome_release
)
typer.secho(f"Resolved structural variant: {strucvar}.", fg=typer.colors.BLUE)
return strucvar
except Exception as e:
typer.secho(e, err=True, fg=typer.colors.RED)
return None
Expand All @@ -34,34 +51,59 @@ def predict(self):

if isinstance(variant, SeqVar):
self.seqvar: SeqVar = variant
self.prediction: PVS1Prediction = PVS1Prediction.NotPVS1
self.prediction_path: PVS1PredictionSeqVarPath = PVS1PredictionSeqVarPath.NotSet
self.seqvar_prediction: PVS1Prediction = PVS1Prediction.NotPVS1
self.seqvar_prediction_path: PVS1PredictionSeqVarPath = PVS1PredictionSeqVarPath.NotSet

try:
typer.secho(
f"Predicting PVS1 for variant {self.seqvar.user_representation}, genome release: {self.genome_release.name}.",
f"Predicting PVS1 for variant {self.seqvar.user_repr}, genome release: {self.genome_release.name}.",
fg=typer.colors.BLUE,
)
seqvar_pvs1 = SeqVarPVS1(self.seqvar)
seqvar_pvs1.initialize()
seqvar_pvs1.verify_PVS1()
self.prediction, self.prediction_path = seqvar_pvs1.get_prediction()
self.seqvar_prediction, self.seqvar_prediction_path = seqvar_pvs1.get_prediction()
typer.secho(
f"PVS1 prediction for {self.seqvar.user_representation}: {self.prediction.name}",
f"PVS1 prediction for {self.seqvar.user_repr}: {self.seqvar_prediction.name}",
fg=typer.colors.GREEN,
)
except Exception as e:
typer.secho(
f"Failed to predict PVS1 for variant {self.seqvar.user_representation}.",
f"Failed to predict PVS1 for variant {self.seqvar.user_repr}.",
err=True,
fg=typer.colors.RED,
)
typer.secho(e, err=True)
return

elif isinstance(variant, str):
# TODO: Add Structure variants PVS1 prediction
pass
elif isinstance(variant, StrucVar):
self.strucvar: StrucVar = variant
self.strucvar_prediction: PVS1Prediction = PVS1Prediction.NotPVS1 # type: ignore
self.strucvar_prediction_path: PVS1PredictionStrucVarPath = PVS1PredictionStrucVarPath.NotSet # type: ignore

try:
typer.secho(
f"Predicting PVS1 for structural variant {self.strucvar.user_repr}, genome release: {self.genome_release.name}.",
fg=typer.colors.BLUE,
)
strucvar_pvs1 = StrucVarPVS1(self.strucvar)
strucvar_pvs1.initialize()
strucvar_pvs1.verify_PVS1()
self.strucvar_prediction, self.strucvar_prediction_path = (
strucvar_pvs1.get_prediction()
)
typer.secho(
f"PVS1 prediction for {self.strucvar.user_repr}: {self.strucvar_prediction.name}",
fg=typer.colors.GREEN,
)
except Exception as e:
typer.secho(
f"Failed to predict PVS1 for structural variant {self.strucvar.user_repr}.",
err=True,
fg=typer.colors.RED,
)
typer.secho(e, err=True)
return
else:
typer.secho(
f"Failed to resolve variant {self.variant_name}.", err=True, fg=typer.colors.RED
Expand Down
4 changes: 3 additions & 1 deletion src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
ALLOWED_GENOME_RELEASES = ["GRCh37", "GRCh38", "hg19", "hg38", "grch37", "grch38"]
#: Allowed sequence variant formats
ALLOWED_SEQVAR_FORMATS = ["Canonical SPDI", "gnomAD", "relaxed SPDI", "dbSNP", "ClinVar"]
#: Allowed structural variant formats
ALLOWED_STRUCVAR_FORMATS = ["Colon-separated", "Hyphen-separated"]


@app.command()
def classify(
variant: Annotated[
str,
typer.Argument(
help=f"Variant to be classified, e.g., 'NM_000038.3:c.797G>A'. Accepted formats: {', '.join(ALLOWED_SEQVAR_FORMATS)}"
help=f"Variant to be classified, e.g., 'NM_000038.3:c.797G>A'. Accepted sequence variants formats: {', '.join(ALLOWED_SEQVAR_FORMATS)}. Accepted structural variants formats: {', '.join(ALLOWED_STRUCVAR_FORMATS)}."
),
],
genome_release: Annotated[
Expand Down
19 changes: 19 additions & 0 deletions src/defs/autopvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ class PVS1PredictionSeqVarPath(Enum):
IC3 = auto()


#: Enumeration for PVS1 prediction path for structural variant
class PVS1PredictionStrucVarPath(Enum):
"""PVS1 prediction path for structure variants."""

NotSet = auto()
DEL1 = auto()
DEL2 = auto()
DEL3 = auto()
DEL4 = auto()
DEL5 = auto()
DEL6 = auto()
DEL7 = auto()
DEL8 = auto()
DUP1 = auto()
DUP2 = auto()
DUP3 = auto()
DUP4 = auto()


#: Enumeration for PVS1 prediction path for structure variants
class PVS1PredictionsStrucVarPath(Enum):
"""PVS1 prediction path for structure variants."""
Expand Down
23 changes: 12 additions & 11 deletions src/defs/seqvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,16 @@ def __init__(
pos: int,
delete: str,
insert: str,
user_representation: Optional[str] = None,
user_repr: Optional[str] = None,
):
self.genome_release = genome_release
self.chrom = self._normalize_chromosome(chrom)
self.pos = pos
self.delete = delete.upper()
self.insert = insert.upper()
self.user_representation = (
user_representation
if user_representation is not None
self.user_repr = (
user_repr
if user_repr
else f"{genome_release.name}-{self.chrom}-{pos}-{delete}-{insert}"
)

Expand All @@ -64,10 +64,11 @@ def _normalize_chromosome(self, chrom: str) -> str:

def __repr__(self):
"""Return a user-friendly representation of the variant."""
return self.user_representation
return self.user_repr


class SeqVarResolver:
"""The class to resolve sequence variants."""

def __init__(self):
pass
Expand Down Expand Up @@ -100,7 +101,7 @@ def _normalize_chrom(self, value: str) -> str:
return value.lower().replace("chr", "").replace("m", "mt").upper()

def _parse_separated_seqvar(
self, value: str, default_genome_build: GenomeRelease = GenomeRelease.GRCh38
self, value: str, default_genome_release: GenomeRelease = GenomeRelease.GRCh38
) -> SeqVar:
"""
Parse a colon/hyphen separated sequence variant representation.
Expand All @@ -119,7 +120,7 @@ def _parse_separated_seqvar(

genome_build_value = match.group("genome_build")
genome_build = (
GenomeRelease[genome_build_value] if genome_build_value else default_genome_build
GenomeRelease[genome_build_value] if genome_build_value else default_genome_release
)
chrom = self._normalize_chrom(match.group("chrom"))
pos = int(match.group("pos"))
Expand All @@ -132,7 +133,7 @@ def _parse_separated_seqvar(
pos=pos,
delete=delete,
insert=insert,
user_representation=value,
user_repr=value,
)
return self._validate_seqvar(variant)

Expand Down Expand Up @@ -170,7 +171,7 @@ def _parse_canonical_spdi_seqvar(self, value: str) -> SeqVar:
pos=pos,
delete=delete,
insert=insert,
user_representation=value,
user_repr=value,
)
return self._validate_seqvar(variant)

Expand All @@ -188,7 +189,7 @@ def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
:raises ParseError: If the variant representation is invalid
"""
try:
return self._parse_separated_seqvar(value, default_genome_build=genome_release)
return self._parse_separated_seqvar(value, default_genome_release=genome_release)
except ParseError:
pass

Expand All @@ -210,7 +211,7 @@ def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
pos=spdi.value.pos,
delete=spdi.value.reference_deleted,
insert=spdi.value.alternate_inserted,
user_representation=value,
user_repr=value,
)
else:
raise ParseError(f"Unable to resolve seqvar: {value}")
Expand Down
Loading