Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dependency chain broken by newer Polars versions #293

Merged
merged 11 commits into from
Jan 10, 2024
Merged
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ PYTHON3 ?= python3
all: check

check:
./lint.sh
cd test && pytest
./lint-and-test.sh

#: Clean up temporary files
clean:
Expand Down
1 change: 1 addition & 0 deletions lint-and-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./lint.sh && ./test.sh
2 changes: 1 addition & 1 deletion lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ set -o errexit
find pyensembl -name '*.py' \
| xargs pylint \
--errors-only \
--disable=print-statement,unsubscriptable-object,not-an-iterable,no-member
--disable=unsubscriptable-object,not-an-iterable,no-member

echo 'Passes pylint check'
33 changes: 30 additions & 3 deletions pyensembl/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
for i in range(start, end + 1):
if i in self._release_to_genome:
raise ValueError(
"Ensembl release %d already has an associated genome" % i
"Ensembl release %d for %s already has an associated genome"
% (i, latin_name)
)
self._release_to_genome[i] = genome_name

Expand Down Expand Up @@ -304,6 +305,18 @@ def check_species_object(species_name_or_object):
reference_assemblies={"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)},
)

zebrafish = Species.register(
latin_name="danio_rerio",
synonyms=["zebrafish"],
reference_assemblies={
"ZFISH7": (47, 53),
"Zv8": (54, 59),
"Zv9": (60, 79),
"GRCz10": (80, 91),
"GRCz11": (92, MAX_ENSEMBL_RELEASE),
},
)

fly = Species.register(
latin_name="drosophila_melanogaster",
synonyms=["drosophila", "fruit fly", "fly"],
Expand All @@ -316,10 +329,24 @@ def check_species_object(species_name_or_object):
},
)

nematode = Species.register(
latin_name="caenorhabditis_elegans",
synonyms=["nematode", "C_elegans"],
reference_assemblies={
"WS180": (47, 49),
"WS190": (50, 54),
"WS200": (55, 57),
"WS210": (58, 59),
"WS220": (61, 66),
"WBcel215": (67, 70),
"WBcel235": (71, MAX_ENSEMBL_RELEASE),
},
)

yeast = Species.register(
latin_name="saccharomyces_cerevisiae",
synonyms=["yeast","budding_yeast"],
synonyms=["yeast", "budding_yeast"],
reference_assemblies={
"R64-1-1": (76, MAX_ENSEMBL_RELEASE),
},
)
)
2 changes: 1 addition & 1 deletion pyensembl/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.2.9"
__version__ = "2.3.0"
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ typechecks>=0.0.2
datacache>=1.1.4
memoized-property>=1.0.2
tinytimer
gtfparse>=1.3.0,<2.0.0
gtfparse>=2.1.0,<2.2.0
serializable
nose>=1.3.3
pylint>=1.4.4
Expand Down
1 change: 1 addition & 0 deletions test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest --cov=pyensembl/ --cov-report=term-missing tests
60 changes: 0 additions & 60 deletions test/common.py

This file was deleted.

File renamed without changes.
88 changes: 88 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import functools

from pyensembl import genome_for_reference_name, cached_release

import pytest


grch37 = genome_for_reference_name("GRCh37")
grch38 = genome_for_reference_name("GRCh38")

major_releases = [grch37, grch38]

contigs = [str(c) for c in range(1, 23)] + ["X", "Y", "M"]


def run_multiple_genomes(*versions):
if len(versions) == 1 and callable(versions[0]):
return pytest.mark.parametrize("genome", major_releases)(versions[0])
if not versions:
genomes = major_releases
else:
genomes = [cached_release(v) for v in versions]
return lambda fn: pytest.mark.parametrize("genome", genomes)(fn)


# TemporaryDirectory only got added to Python in version 3.2
try:
# pylint: disable=no-name-in-module
from tempfile import TemporaryDirectory

except ImportError:
# only added in Python 3.2
from tempfile import mkdtemp
from shutil import rmtree

class TemporaryDirectory(object):
def __init__(self):
self.name = mkdtemp()

def __enter__(self, *args, **kwargs):
return self.name

def __exit__(self, type, value, traceback):
rmtree(self.name)
# don't suppress exceptions
return False


def eq_(x, y, msg=None):
if msg is None:
assert x == y
else:
assert x == y, msg


def neq_(x, y, msg=None):
if msg is None:
assert x != y
else:
assert x != y, msg


def gt_(x, y, msg=None):
if msg is None:
assert x > y
else:
assert x > y, msg


def gte_(x, y, msg=None):
if msg is None:
assert x >= y
else:
assert x >= y, msg


def lt_(x, y, msg=None):
if msg is None:
assert x < y
else:
assert x < y, msg


def lte_(x, y, msg=None):
if msg is None:
assert x <= y
else:
assert x <= y, msg
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# expected format is <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
chr1 hg38_knownGene exon 17369 17436 0.000000 - . gene_id "uc031tla.1"; transcript_id "uc031tla.1";
chr1 hg38_knownGene exon 29554 30039 0.000000 + . gene_id "uc057aty.1"; transcript_id "uc057aty.1";
chr1 hg38_knownGene exon 30564 30667 0.000000 + . gene_id "uc057aty.1"; transcript_id "uc057aty.1";
Expand All @@ -12,4 +13,4 @@ chr1 hg38_knownGene exon 35245 35481 0.000000 - . gene_id "uc057aua.1"; transcri
chr1 hg38_knownGene exon 35721 36073 0.000000 - . gene_id "uc057aua.1"; transcript_id "uc057aua.1";
chr1 hg38_knownGene start_codon 69091 69093 0.000000 + . gene_id "uc001aal.1"; transcript_id "uc001aal.1";
chr1 hg38_knownGene CDS 69091 70005 0.000000 + 0 gene_id "uc001aal.1"; transcript_id "uc001aal.1";
chr1 hg38_knownGene stop_codon 70006 70008 0.000000 + . gene_id "uc001aal.1"; transcript_id "uc001aal.1";
chr1 hg38_knownGene stop_codon 70006 70008 0.000000 + . gene_id "uc001aal.1"; transcript_id "uc001aal.1";
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ chr1 hg38_refGene CDS 67115352 67115464 0.000000 - 1 gene_id "NM_001276352"; tra
chr1 hg38_refGene exon 67115352 67115464 0.000000 - . gene_id "NM_001276352"; transcript_id "NM_001276352";
chr1 hg38_refGene CDS 67125752 67125909 0.000000 - 0 gene_id "NM_001276352"; transcript_id "NM_001276352";
chr1 hg38_refGene exon 67125752 67125909 0.000000 - . gene_id "NM_001276352"; transcript_id "NM_001276352";
chr1 hg38_refGene CDS 67127166 67127240 0.000000 - 0 gene_id "NM_001276352"; transcript_id "NM_001276352";
chr1 hg38_refGene CDS 67127166 67127240 0.000000 - 0 gene_id "NM_001276352"; transcript_id "NM_001276352";
File renamed without changes.
File renamed without changes.
5 changes: 3 additions & 2 deletions test/test_ensembl_gtf.py → tests/test_ensembl_gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from os.path import exists


from .common import test_ensembl_releases
from .common import run_multiple_genomes

@test_ensembl_releases()

@run_multiple_genomes()
def gtf_path_endswith_gtf_gz(ensembl):
path = ensembl.gtf.gtf_path
assert exists(path)
Expand Down
File renamed without changes.
File renamed without changes.
58 changes: 37 additions & 21 deletions test/test_gene_ids.py → tests/test_gene_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
"""
from __future__ import absolute_import

from nose.tools import assert_raises, ok_
from pytest import raises
from pyensembl import ensembl_grch38, cached_release

from .common import test_ensembl_releases
from .common import run_multiple_genomes, eq_

ensembl77 = cached_release(77, "human")


def test_gene_ids_grch38_hla_a():
# chr6:29,945,884 is a position for HLA-A
# Gene ID = ENSG00000206503
Expand All @@ -21,40 +22,55 @@ def test_gene_ids_grch38_hla_a():
# Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
ids = ensembl_grch38.gene_ids_at_locus(6, 29945884)
expected = "ENSG00000206503"
assert ids == ["ENSG00000206503"], \
"Expected HLA-A, gene ID = %s, got: %s" % (expected, ids)
assert ids == ["ENSG00000206503"], "Expected HLA-A, gene ID = %s, got: %s" % (
expected,
ids,
)


def test_gene_ids_of_gene_name_hla_grch38():
hla_a_gene_ids = ensembl_grch38.gene_ids_of_gene_name("HLA-A")
assert 'ENSG00000206503' in hla_a_gene_ids, hla_a_gene_ids
assert "ENSG00000206503" in hla_a_gene_ids, hla_a_gene_ids

hla_b_gene_ids = ensembl_grch38.gene_ids_of_gene_name("HLA-B")
assert 'ENSG00000234745' in hla_b_gene_ids, hla_b_gene_ids
assert "ENSG00000234745" in hla_b_gene_ids, hla_b_gene_ids

hla_c_gene_ids = ensembl_grch38.gene_ids_of_gene_name("HLA-C")
assert 'ENSG00000204525' in hla_c_gene_ids, hla_c_gene_ids
assert "ENSG00000204525" in hla_c_gene_ids, hla_c_gene_ids


def test_gene_id_of_protein_id_release77():
gene_id = ensembl77.gene_id_of_protein_id("ENSP00000485677")
ok_('ENSG00000279634', gene_id)
eq_("ENSG00000279634", gene_id)


def test_gene_id_of_invalid_name():
with assert_raises(Exception):
ensembl_grch38.gene_ids_of_gene_name(
"A wonderous pony sees through your soul")
with raises(Exception):
ensembl_grch38.gene_ids_of_gene_name("A wonderous pony sees through your soul")


@test_ensembl_releases()
def test_gene_ids_on_contig(ensembl):
gene_ids_chr17 = ensembl.gene_ids(contig=17)
@run_multiple_genomes()
def test_gene_ids_on_contig(genome):
gene_ids_chr17 = genome.gene_ids(contig=17)
# gene ID of TP53
tp53 = "ENSG00000141510"
assert tp53 in gene_ids_chr17, \
"Missing %s from %s on chr17, example IDs: %s (total = %d)" % (
tp53, ensembl, gene_ids_chr17[:5], len(gene_ids_chr17))
assert (
tp53 in gene_ids_chr17
), "Missing %s from %s on chr17, example IDs: %s (total = %d)" % (
tp53,
genome,
gene_ids_chr17[:5],
len(gene_ids_chr17),
)

# gene ID of SMAD4
gene_ids_chr18 = ensembl.gene_ids(contig=18)
gene_ids_chr18 = genome.gene_ids(contig=18)
smad4 = "ENSG00000141646"
assert smad4 in gene_ids_chr18, \
"Missing %s from %s on chr18, example result: %s (total = %d)" % (
smad4, ensembl, gene_ids_chr18[:5], len(gene_ids_chr18))
assert (
smad4 in gene_ids_chr18
), "Missing %s from %s on chr18, example result: %s (total = %d)" % (
smad4,
genome,
gene_ids_chr18[:5],
len(gene_ids_chr18),
)
Loading