Skip to content

Commit

Permalink
Add some missing logic for failed URI's in datasets and test_saving (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ascillitoe committed Nov 8, 2022
1 parent 2149e5d commit 455a11d
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

### Development
- UTF-8 decoding is enforced when `README.md` is opened by `setup.py`. This is to prevent pip install errors on systems with `PYTHONIOENCODING` set to use other encoders ([#605](https://github.com/SeldonIO/alibi-detect/pull/605)).
- Skip specific save/load tests that require downloading remote artefacts if the relevant URI(s) is/are down ([#607](https://github.com/SeldonIO/alibi-detect/pull/607)).

## v0.10.3
## [v0.10.3](https://github.com/SeldonIO/alibi-detect/tree/v0.10.3) (2022-08-17)
Expand Down
7 changes: 6 additions & 1 deletion alibi_detect/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from alibi_detect.utils.data import Bunch
from alibi_detect.utils.url import _join_url
from requests import RequestException
from urllib.error import URLError
from scipy.io import arff
from sklearn.datasets import fetch_kddcup99

Expand Down Expand Up @@ -59,7 +60,11 @@ def fetch_kdd(target: list = ['dos', 'r2l', 'u2r', 'probe'],
"""

# fetch raw data
data_raw = fetch_kddcup99(subset=None, data_home=None, percent10=percent10)
try:
data_raw = fetch_kddcup99(subset=None, data_home=None, percent10=percent10)
except URLError:
logger.exception("Could not connect, URL may be out of service")
raise

# specify columns
cols = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',
Expand Down
7 changes: 6 additions & 1 deletion alibi_detect/saving/tests/datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest
from alibi_testing.data import get_movie_sentiment_data
from pytest_cases import parametrize
from requests import RequestException

# Note: If any of below cases become large, see https://smarie.github.io/python-pytest-cases/#c-caching-cases
FLOAT = np.float32
Expand Down Expand Up @@ -63,4 +65,7 @@ def data_synthetic_nd(data_shape):
class TextData:
@staticmethod
def movie_sentiment_data():
return get_movie_sentiment_data()
try:
return get_movie_sentiment_data()
except RequestException:
pytest.skip('Movie sentiment dataset URL down')
16 changes: 13 additions & 3 deletions alibi_detect/saving/tests/test_saving.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from functools import partial
from pathlib import Path
from typing import Callable
from requests.exceptions import HTTPError

import toml
import dill
Expand Down Expand Up @@ -202,7 +203,10 @@ def nlp_embedding_and_tokenizer(model_name, max_len, uae, backend):
backend = 'tf' if backend == 'tensorflow' else 'pt'

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
try:
tokenizer = AutoTokenizer.from_pretrained(model_name + 'TODO')
except (OSError, HTTPError):
pytest.skip(f"Problem downloading {model_name} from huggingface.co")
X = 'A dummy string' # this will be padded to max_len
tokens = tokenizer(list(X[:5]), pad_to_max_length=True,
max_length=max_len, return_tensors=backend)
Expand All @@ -214,13 +218,19 @@ def nlp_embedding_and_tokenizer(model_name, max_len, uae, backend):
enc_dim = 32

if backend == 'tf':
embedding = TransformerEmbedding_tf(model_name, emb_type, layers)
try:
embedding = TransformerEmbedding_tf(model_name, emb_type, layers)
except (OSError, HTTPError):
pytest.skip(f"Problem downloading {model_name} from huggingface.co")
if uae:
x_emb = embedding(tokens)
shape = (x_emb.shape[1],)
embedding = UAE_tf(input_layer=embedding, shape=shape, enc_dim=enc_dim)
else:
embedding = TransformerEmbedding_pt(model_name, emb_type, layers)
try:
embedding = TransformerEmbedding_pt(model_name, emb_type, layers)
except (OSError, HTTPError):
pytest.skip(f"Problem downloading {model_name} from huggingface.co")
if uae:
x_emb = embedding(tokens)
emb_dim = x_emb.shape[1]
Expand Down
11 changes: 9 additions & 2 deletions alibi_detect/tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import pytest
from requests import RequestException
from urllib.error import URLError
from alibi_detect.datasets import fetch_kdd, fetch_ecg, corruption_types_cifar10c, fetch_cifar10c, \
fetch_attack, fetch_nab, get_list_nab
from alibi_detect.utils.data import Bunch
Expand All @@ -24,7 +25,7 @@ def test_fetch_kdd(return_X_y):
keep_cols = np.random.choice(keep_cols_list, 5, replace=False)
try:
data = fetch_kdd(target=target, keep_cols=keep_cols, percent10=True, return_X_y=return_X_y)
except RequestException:
except URLError:
pytest.skip('KDD dataset URL down')
if return_X_y:
assert isinstance(data, tuple)
Expand Down Expand Up @@ -53,13 +54,19 @@ def test_fetch_ecg(return_X_y):


# CIFAR-10-C dataset
corruption_list = corruption_types_cifar10c()
try:
corruption_list = corruption_types_cifar10c()
except RequestException:
corruption_list = None


@pytest.mark.skipif(corruption_list is None, reason="CIFAR-10-C dataset URL is down")
def test_types_cifar10c():
print(corruption_list)
assert len(corruption_list) == 19


@pytest.mark.skipif(corruption_list is None, reason="CIFAR-10-C dataset URL is down")
@pytest.mark.parametrize('return_X_y', [True, False])
def test_fetch_cifar10c(return_X_y):
corruption = list(np.random.choice(corruption_list, 5, replace=False))
Expand Down

0 comments on commit 455a11d

Please sign in to comment.