Skip to content

Commit

Permalink
[attempt 2] Eager import pandas in ray data for python >= 3.7 (ray-pr…
Browse files Browse the repository at this point in the history
…oject#33103)

Signed-off-by: Jack He <[email protected]>
  • Loading branch information
ericl authored and ProjectsByJackHe committed May 4, 2023
1 parent 058085e commit 956aad1
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
8 changes: 8 additions & 0 deletions python/ray/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
import sys

# Short term workaround for https://github.com/ray-project/ray/issues/32435
# Datasets currently has a hard dependency on pandas, so it doesn't need to be delayed.
# ray.data import is still eager for all ray imports for Python 3.6:
if sys.version_info >= (3, 7):
import pandas # noqa

from ray.data._internal.compute import ActorPoolStrategy
from ray.data._internal.progress_bar import set_progress_bars
from ray.data.dataset import Dataset
Expand Down
9 changes: 2 additions & 7 deletions python/ray/data/tests/test_dataset_image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import time
from typing import Dict

import numpy as np
Expand All @@ -18,6 +17,7 @@
from ray.data.tests.conftest import * # noqa
from ray.data.tests.mock_http_server import * # noqa
from ray.tests.conftest import * # noqa
from ray._private.test_utils import wait_for_condition


class TestReadImages:
Expand Down Expand Up @@ -193,12 +193,7 @@ def test_dynamic_block_split(ray_start_regular_shared):
ds.fully_executed()
# Verify dynamic block splitting taking effect to generate more blocks.
assert ds.num_blocks() == 3

# NOTE: Need to wait for 1 second before checking stats, because we report
# stats to stats actors asynchronously when returning the blocks metadata.
# TODO(chengsu): clean it up after refactoring lazy block list.
time.sleep(1)
assert "3 blocks executed" in ds.stats()
wait_for_condition(lambda: "3 blocks executed" in ds.stats(), timeout=20)

# Test union of same datasets
union_ds = ds.union(ds, ds, ds).fully_executed()
Expand Down

0 comments on commit 956aad1

Please sign in to comment.