diff --git a/doc/source/data/getting-started.rst b/doc/source/data/getting-started.rst index 7353b99cf995..028ae66cf160 100644 --- a/doc/source/data/getting-started.rst +++ b/doc/source/data/getting-started.rst @@ -52,7 +52,7 @@ transform datasets. Ray executes transformations in parallel for performance at import pandas as pd - # Find rows with spepal length < 5.5 and petal length > 3.5. + # Find rows with sepal length < 5.5 and petal length > 3.5. def transform_batch(df: pd.DataFrame) -> pd.DataFrame: return df[(df["sepal length (cm)"] < 5.5) & (df["petal length (cm)"] > 3.5)] @@ -62,8 +62,8 @@ transform datasets. Ray executes transformations in parallel for performance at .. testoutput:: MapBatches(transform_batch) - +- Dataset( - num_blocks=..., + +- Datastream( + num_blocks=1, num_rows=150, schema={ sepal length (cm): double, @@ -74,6 +74,7 @@ transform datasets. Ray executes transformations in parallel for performance at } ) + To learn more about transforming datasets, read :ref:`Transforming datasets `. diff --git a/doc/source/data/glossary.rst b/doc/source/data/glossary.rst index 952ebff5c9ec..87d421d0a09d 100644 --- a/doc/source/data/glossary.rst +++ b/doc/source/data/glossary.rst @@ -107,7 +107,7 @@ Ray Datasets Glossary >>> import ray >>> ray.data.from_items(["spam", "ham", "eggs"]) - Dataset(num_blocks=3, num_rows=3, schema=) + MaterializedDatastream(num_blocks=3, num_rows=3, schema=) Tensor Dataset A Dataset that represents a collection of ndarrays. @@ -119,7 +119,7 @@ Ray Datasets Glossary >>> import numpy as np >>> import ray >>> ray.data.from_numpy(np.zeros((100, 32, 32, 3))) - Dataset( + MaterializedDatastream( num_blocks=1, num_rows=100, schema={__value__: ArrowTensorType(shape=(32, 32, 3), dtype=double)} @@ -132,7 +132,7 @@ Ray Datasets Glossary >>> import ray >>> ray.data.read_csv("s3://anonymous@air-example-data/iris.csv") - Dataset( + Datastream( num_blocks=1, num_rows=150, schema={ diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index 6b83948601a8..057caad18d3f 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -440,8 +440,12 @@ def map_batches( ... "age": [4, 14, 9] ... }) >>> ds = ray.data.from_pandas(df) - >>> ds - Datastream(num_blocks=1, num_rows=3, schema={name: object, age: int64}) + >>> ds # doctest: +SKIP + MaterializedDatastream( + num_blocks=1, + num_rows=3, + schema={name: object, age: int64} + ) Call :meth:`.default_batch_format` to determine the default batch type. diff --git a/python/ray/data/dataset_iterator.py b/python/ray/data/dataset_iterator.py index f711d766ca6c..25421f1c415a 100644 --- a/python/ray/data/dataset_iterator.py +++ b/python/ray/data/dataset_iterator.py @@ -56,9 +56,9 @@ class DataIterator(abc.ABC): >>> import ray >>> ds = ray.data.range(5) >>> ds - Dataset(num_blocks=5, num_rows=5, schema=) + Datastream(num_blocks=5, num_rows=5, schema=) >>> ds.iterator() - DataIterator(Dataset(num_blocks=5, num_rows=5, schema=)) + DataIterator(Datastream(num_blocks=5, num_rows=5, schema=)) >>> ds = ds.repeat(); ds DatasetPipeline(num_windows=inf, num_stages=2) >>> ds.iterator() @@ -648,7 +648,7 @@ def to_tf( ... "s3://anonymous@air-example-data/iris.csv" ... ) >>> it = ds.iterator(); it - DataIterator(Dataset( + DataIterator(Datastream( num_blocks=1, num_rows=150, schema={ @@ -679,7 +679,7 @@ def to_tf( >>> it = preprocessor.transform(ds).iterator() >>> it DataIterator(Concatenator - +- Dataset( + +- Datastream( num_blocks=1, num_rows=150, schema={ diff --git a/python/ray/train/torch/torch_trainer.py b/python/ray/train/torch/torch_trainer.py index 7a74fc0dae1b..6c43776bbe9a 100644 --- a/python/ray/train/torch/torch_trainer.py +++ b/python/ray/train/torch/torch_trainer.py @@ -227,7 +227,7 @@ def train_loop_per_worker(): best_checkpoint_loss = result.metrics['loss'] # Assert loss is less 0.09 - assert best_checkpoint_loss <= 0.09 + assert best_checkpoint_loss <= 0.09 # doctest: +SKIP .. testoutput:: :hide: