Skip to content

Commit

Permalink
Add the ray dataset example to test_client (#141)
Browse files Browse the repository at this point in the history
* Add the ray dataset example to test_client

* Pin pyarrow

* Add comment

* Fix bad import for Ray dataset

* Add newline

* Skip dataset test on old ray

* Fix import
  • Loading branch information
Yard1 authored Jul 29, 2021
1 parent 430dd01 commit dc72afb
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@
"distributed computing framework Ray.",
url="https://github.com/ray-project/xgboost_ray",
install_requires=[
"xgboost>=0.90", "ray", "numpy>=1.16,<1.20", "pandas", "pyarrow"
"xgboost>=0.90", "ray", "numpy>=1.16,<1.20", "pandas", "pyarrow<5.0.0"
])
# pyarrow<5.0.0 pinned until petastorm is updated
8 changes: 4 additions & 4 deletions xgboost_ray/data_sources/ray_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from xgboost_ray.data_sources.object_store import ObjectStore

try:
import ray.experimental.data # noqa: F401
import ray.experimental.data.dataset # noqa: F401
RAY_DATASET_AVAILABLE = True
except (ImportError, AttributeError):
RAY_DATASET_AVAILABLE = False
Expand Down Expand Up @@ -41,11 +41,11 @@ def is_data_type(data: Any,
if not RAY_DATASET_AVAILABLE:
return False

return isinstance(data, ray.experimental.data.Dataset)
return isinstance(data, ray.experimental.data.dataset.Dataset)

@staticmethod
def load_data(
data: Any, # ray.experimental.data.Dataset
data: Any, # ray.experimental.data.dataset.Dataset
ignore: Optional[Sequence[str]] = None,
indices: Optional[Union[Sequence[int], Sequence[
ObjectRef]]] = None,
Expand All @@ -70,7 +70,7 @@ def convert_to_series(data: Any) -> pd.Series:

@staticmethod
def get_actor_shards(
data: Any, # ray.experimental.data.Dataset
data: Any, # ray.experimental.data.dataset.Dataset
actors: Sequence[ActorHandle]) -> \
Tuple[Any, Optional[Dict[int, Any]]]:
_assert_ray_data_available()
Expand Down
2 changes: 1 addition & 1 deletion xgboost_ray/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
MLDataset = Unavailable

try:
from ray.experimental.data import Dataset as RayDataset
from ray.experimental.data.dataset import Dataset as RayDataset
except (ImportError, ModuleNotFoundError):

class RayDataset:
Expand Down
10 changes: 10 additions & 0 deletions xgboost_ray/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import ray
from ray.util.client.ray_client_helpers import ray_start_client_server
from xgboost_ray.data_sources.ray_dataset import RAY_DATASET_AVAILABLE


@pytest.fixture
Expand Down Expand Up @@ -46,6 +47,15 @@ def test_simple_modin(start_client_server_5_cpus):
main(cpus_per_actor=1, num_actors=4)


@pytest.mark.skipif(
not RAY_DATASET_AVAILABLE,
reason="Ray datasets are not available in this version of Ray")
def test_simple_ray_dataset(start_client_server_5_cpus):
assert ray.util.client.ray.is_connected()
from xgboost_ray.examples.simple_ray_dataset import main
main(cpus_per_actor=1, num_actors=4)


if __name__ == "__main__":
import pytest # noqa: F811
import sys
Expand Down

0 comments on commit dc72afb

Please sign in to comment.