Rename all DataSet mentions to Dataset (excl. docs) (#3147)

Signed-off-by: Merel Theisen <[email protected]> Co-authored-by: Deepyaman Datta <[email protected]>
kedro-org · Oct 10, 2023 · 2297d23 · 2297d23
1 parent bb61b17
commit 2297d23
Show file tree

Hide file tree

Showing 32 changed files with 228 additions and 443 deletions.
diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
@@ -52,7 +52,7 @@ commands:
           name: Install venv for some pre-commit hooks
           command: conda install -y virtualenv
       - run:
-          # pytables does not work properly with python 3.9 to handle our HDFDataSet
+          # pytables does not work properly with python 3.9 to handle our HDFDataset
           # if pip-installed, so we install this dependency via conda
           name: Install pytables
           command: conda install -c conda-forge pytables -y

diff --git a/docs/source/kedro.io.rst b/docs/source/kedro.io.rst
@@ -13,16 +13,11 @@ kedro.io
 
    kedro.io.AbstractDataset
    kedro.io.AbstractVersionedDataset
-   kedro.io.CachedDataSet
    kedro.io.CachedDataset
    kedro.io.DataCatalog
-   kedro.io.IncrementalDataSet
    kedro.io.IncrementalDataset
-   kedro.io.LambdaDataSet
    kedro.io.LambdaDataset
-   kedro.io.MemoryDataSet
    kedro.io.MemoryDataset
-   kedro.io.PartitionedDataSet
    kedro.io.PartitionedDataset
    kedro.io.Version
 
@@ -32,9 +27,6 @@ kedro.io
    :toctree:
    :template: autosummary/class.rst
 
-   kedro.io.DataSetAlreadyExistsError
-   kedro.io.DataSetError
-   kedro.io.DataSetNotFoundError
    kedro.io.DatasetAlreadyExistsError
    kedro.io.DatasetError
    kedro.io.DatasetNotFoundError
diff --git a/docs/source/kedro_datasets.rst b/docs/source/kedro_datasets.rst
@@ -11,91 +11,48 @@ kedro_datasets
    :toctree:
    :template: autosummary/class.rst
 
-   kedro_datasets.api.APIDataSet
    kedro_datasets.api.APIDataset
-   kedro_datasets.biosequence.BioSequenceDataSet
    kedro_datasets.biosequence.BioSequenceDataset
-   kedro_datasets.dask.ParquetDataSet
    kedro_datasets.dask.ParquetDataset
-   kedro_datasets.databricks.ManagedTableDataSet
    kedro_datasets.databricks.ManagedTableDataset
-   kedro_datasets.email.EmailMessageDataSet
    kedro_datasets.email.EmailMessageDataset
-   kedro_datasets.geopandas.GeoJSONDataSet
    kedro_datasets.geopandas.GeoJSONDataset
    kedro_datasets.holoviews.HoloviewsWriter
-   kedro_datasets.json.JSONDataSet
    kedro_datasets.json.JSONDataset
    kedro_datasets.matplotlib.MatplotlibWriter
-   kedro_datasets.networkx.GMLDataSet
    kedro_datasets.networkx.GMLDataset
-   kedro_datasets.networkx.GraphMLDataSet
    kedro_datasets.networkx.GraphMLDataset
-   kedro_datasets.networkx.JSONDataSet
    kedro_datasets.networkx.JSONDataset
-   kedro_datasets.pandas.CSVDataSet
    kedro_datasets.pandas.CSVDataset
-   kedro_datasets.pandas.DeltaTableDataSet
    kedro_datasets.pandas.DeltaTableDataset
-   kedro_datasets.pandas.ExcelDataSet
    kedro_datasets.pandas.ExcelDataset
-   kedro_datasets.pandas.FeatherDataSet
    kedro_datasets.pandas.FeatherDataset
-   kedro_datasets.pandas.GBQQueryDataSet
    kedro_datasets.pandas.GBQQueryDataset
-   kedro_datasets.pandas.GBQTableDataSet
    kedro_datasets.pandas.GBQTableDataset
-   kedro_datasets.pandas.GenericDataSet
    kedro_datasets.pandas.GenericDataset
-   kedro_datasets.pandas.HDFDataSet
    kedro_datasets.pandas.HDFDataset
-   kedro_datasets.pandas.JSONDataSet
    kedro_datasets.pandas.JSONDataset
-   kedro_datasets.pandas.ParquetDataSet
    kedro_datasets.pandas.ParquetDataset
-   kedro_datasets.pandas.SQLQueryDataSet
    kedro_datasets.pandas.SQLQueryDataset
-   kedro_datasets.pandas.SQLTableDataSet
    kedro_datasets.pandas.SQLTableDataset
-   kedro_datasets.pandas.XMLDataSet
    kedro_datasets.pandas.XMLDataset
-   kedro_datasets.pickle.PickleDataSet
    kedro_datasets.pickle.PickleDataset
-   kedro_datasets.pillow.ImageDataSet
    kedro_datasets.pillow.ImageDataset
-   kedro_datasets.plotly.JSONDataSet
    kedro_datasets.plotly.JSONDataset
-   kedro_datasets.plotly.PlotlyDataSet
    kedro_datasets.plotly.PlotlyDataset
-   kedro_datasets.polars.CSVDataSet
    kedro_datasets.polars.CSVDataset
-   kedro_datasets.polars.GenericDataSet
    kedro_datasets.polars.GenericDataset
-   kedro_datasets.redis.PickleDataSet
    kedro_datasets.redis.PickleDataset
-   kedro_datasets.snowflake.SnowparkTableDataSet
    kedro_datasets.snowflake.SnowparkTableDataset
-   kedro_datasets.spark.DeltaTableDataSet
    kedro_datasets.spark.DeltaTableDataset
-   kedro_datasets.spark.SparkDataSet
    kedro_datasets.spark.SparkDataset
-   kedro_datasets.spark.SparkHiveDataSet
    kedro_datasets.spark.SparkHiveDataset
-   kedro_datasets.spark.SparkJDBCDataSet
    kedro_datasets.spark.SparkJDBCDataset
-   kedro_datasets.spark.SparkStreamingDataSet
    kedro_datasets.spark.SparkStreamingDataset
-   kedro_datasets.svmlight.SVMLightDataSet
    kedro_datasets.svmlight.SVMLightDataset
-   kedro_datasets.tensorflow.TensorFlowModelDataSet
    kedro_datasets.tensorflow.TensorFlowModelDataset
-   kedro_datasets.text.TextDataSet
    kedro_datasets.text.TextDataset
-   kedro_datasets.tracking.JSONDataSet
    kedro_datasets.tracking.JSONDataset
-   kedro_datasets.tracking.MetricsDataSet
    kedro_datasets.tracking.MetricsDataset
-   kedro_datasets.video.VideoDataSet
    kedro_datasets.video.VideoDataset
-   kedro_datasets.yaml.YAMLDataSet
    kedro_datasets.yaml.YAMLDataset
diff --git a/docs/source/tutorial/set_up_data.md b/docs/source/tutorial/set_up_data.md
@@ -28,11 +28,11 @@ Open `conf/base/catalog.yml` for the spaceflights project to inspect the content
 
 ```yaml
 companies:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/01_raw/companies.csv
 
 reviews:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/01_raw/reviews.csv
 ```
 </details> <br />
@@ -44,7 +44,7 @@ Likewise for the `xlsx` dataset:
 
 ```yaml
 shuttles:
-  type: pandas.ExcelDataSet
+  type: pandas.ExcelDataset
   filepath: data/01_raw/shuttles.xlsx
   load_args:
     engine: openpyxl # Use modern Excel engine (the default since Kedro 0.18.0)
@@ -75,7 +75,7 @@ companies.head()
 <summary><b>Click to expand</b></summary>
 
 ```
-INFO     Loading data from 'companies' (CSVDataSet)
+INFO     Loading data from 'companies' (CSVDataset)
 Out[1]:
       id company_rating       company_location  total_fleet_count iata_approved
 0  35029           100%                   Niue                4.0             f
@@ -100,7 +100,7 @@ You should see output such as the following:
 <summary><b>Click to expand</b></summary>
 
 ```
-INFO     Loading data from 'shuttles' (ExcelDataSet)
+INFO     Loading data from 'shuttles' (ExcelDataset)
 Out[1]:
       id       shuttle_location shuttle_type engine_type  ... d_check_complete  moon_clearance_complete     price company_id
 0  63561                   Niue      Type V5     Quantum  ...                f                        f  $1,325.0      35029

diff --git a/features/environment.py b/features/environment.py
@@ -118,6 +118,6 @@ def _install_project_requirements(context):
         .splitlines()
     )
     install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req]
-    install_reqs.append("kedro-datasets[pandas.CSVDataSet]")
+    install_reqs.append("kedro-datasets[pandas.CSVDataset]")
     call([context.pip, "install", *install_reqs], env=context.env)
     return context
diff --git a/features/steps/e2e_test_catalog.yml b/features/steps/e2e_test_catalog.yml
@@ -1,20 +1,20 @@
 A:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/01_raw/input_1.csv
   save_args:
     index: False
 C:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/01_raw/input_2.csv
   save_args:
     index: False
 E:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/02_intermediate/output_1.csv
   save_args:
     index: False
 F:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/02_intermediate/output_2.csv
   save_args:
     index: False
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml
@@ -8,11 +8,11 @@
 # An example data set definition can look as follows:
 #
 #bikes:
-#  type: pandas.CSVDataSet
+#  type: pandas.CSVDataset
 #  filepath: "data/01_raw/bikes.csv"
 #
 #weather:
-#  type: spark.SparkDataSet
+#  type: spark.SparkDataset
 #  filepath: s3a://your_bucket/data/01_raw/weather*
 #  file_format: csv
 #  credentials: dev_s3
@@ -24,7 +24,7 @@
 #    header: True
 #
 #scooters:
-#  type: pandas.SQLTableDataSet
+#  type: pandas.SQLTableDataset
 #  credentials: scooters_credentials
 #  table_name: scooters
 #  load_args:
@@ -35,13 +35,13 @@
 #    # if_exists: 'fail'
 #    # if_exists: 'append'
 #
-# The Data Catalog supports being able to reference the same file using two different DataSet implementations
+# The Data Catalog supports being able to reference the same file using two different dataset implementations
 # (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
 # https://kedro.readthedocs.io/en/stable/data/data_catalog.html
 #
 # This is a data set used by the "Hello World" example pipeline provided with the project
 # template. Please feel free to remove it once you remove the example pipeline.
 
 example_iris_data:
-  type: pandas.CSVDataSet
+  type: pandas.CSVDataset
   filepath: data/01_raw/iris.csv
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt
@@ -5,7 +5,7 @@ jupyter~=1.0
 jupyterlab_server>=2.11.1, <2.16.0
 jupyterlab~=3.0, <3.6.0
 kedro~={{ cookiecutter.kedro_version}}
-kedro-datasets[pandas.CSVDataSet]
+kedro-datasets[pandas.CSVDataset]
 kedro-telemetry~=0.2.0
 pytest-cov~=3.0
 pytest-mock>=1.7.1, <2.0

diff --git a/kedro/config/templated_config.py b/kedro/config/templated_config.py
@@ -63,8 +63,8 @@ class TemplatedConfigLoader(AbstractConfigLoader):
         environment: "dev"
 
         datasets:
-            csv: "pandas.CSVDataSet"
-            spark: "spark.SparkDataSet"
+            csv: "pandas.CSVDataset"
+            spark: "spark.SparkDataset"
 
         folders:
             raw: "01_raw"

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
@@ -3,7 +3,7 @@
 """
 from __future__ import annotations
 
-from .cached_dataset import CachedDataSet, CachedDataset
+from .cached_dataset import CachedDataset
 from .core import (
     AbstractDataset,
     AbstractVersionedDataset,
@@ -13,52 +13,24 @@
     Version,
 )
 from .data_catalog import DataCatalog
-from .lambda_dataset import LambdaDataSet, LambdaDataset
-from .memory_dataset import MemoryDataSet, MemoryDataset
+from .lambda_dataset import LambdaDataset
+from .memory_dataset import MemoryDataset
 from .partitioned_dataset import (
-    IncrementalDataSet,
     IncrementalDataset,
-    PartitionedDataSet,
     PartitionedDataset,
 )
 
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-DataSetError: type[DatasetError]
-DataSetNotFoundError: type[DatasetNotFoundError]
-DataSetAlreadyExistsError: type[DatasetAlreadyExistsError]
-AbstractDataSet: type[AbstractDataset]
-AbstractVersionedDataSet: type[AbstractVersionedDataset]
-
-
-def __getattr__(name):
-    import kedro.io.core  # noqa: import-outside-toplevel
-
-    if name in (kedro.io.core._DEPRECATED_CLASSES):  # noqa: protected-access
-        return getattr(kedro.io.core, name)
-    raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
-
-
 __all__ = [
-    "AbstractDataSet",
     "AbstractDataset",
-    "AbstractVersionedDataSet",
     "AbstractVersionedDataset",
-    "CachedDataSet",
     "CachedDataset",
     "DataCatalog",
-    "DataSetAlreadyExistsError",
     "DatasetAlreadyExistsError",
-    "DataSetError",
     "DatasetError",
-    "DataSetNotFoundError",
     "DatasetNotFoundError",
-    "IncrementalDataSet",
     "IncrementalDataset",
-    "LambdaDataSet",
     "LambdaDataset",
-    "MemoryDataSet",
     "MemoryDataset",
-    "PartitionedDataSet",
     "PartitionedDataset",
     "Version",
 ]
diff --git a/kedro/io/cached_dataset.py b/kedro/io/cached_dataset.py
@@ -5,15 +5,11 @@
 from __future__ import annotations
 
 import logging
-import warnings
 from typing import Any
 
 from kedro.io.core import VERSIONED_FLAG_KEY, AbstractDataset, Version
 from kedro.io.memory_dataset import MemoryDataset
 
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-CachedDataSet: type[CachedDataset]
-
 
 class CachedDataset(AbstractDataset):
     """``CachedDataset`` is a dataset wrapper which caches in memory the data saved,
@@ -121,16 +117,3 @@ def __getstate__(self):
         logging.getLogger(__name__).warning("%s: clearing cache to pickle.", str(self))
         self._cache.release()
         return self.__dict__
-
-
-def __getattr__(name):
-    if name == "CachedDataSet":
-        alias = CachedDataset
-        warnings.warn(
-            f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
-            f"and the alias will be removed in Kedro 0.19.0",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        return alias
-    raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
diff --git a/kedro/io/core.py b/kedro/io/core.py
@@ -29,13 +29,6 @@
 PROTOCOL_DELIMITER = "://"
 CLOUD_PROTOCOLS = ("s3", "s3n", "s3a", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
 
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-DataSetError: type[DatasetError]
-DataSetNotFoundError: type[DatasetNotFoundError]
-DataSetAlreadyExistsError: type[DatasetAlreadyExistsError]
-AbstractDataSet: type[AbstractDataset]
-AbstractVersionedDataSet: type[AbstractVersionedDataset]
-
 
 class DatasetError(Exception):
     """``DatasetError`` raised by ``AbstractDataset`` implementations
@@ -757,25 +750,3 @@ def validate_on_forbidden_chars(**kwargs):
             raise DatasetError(
                 f"Neither white-space nor semicolon are allowed in '{key}'."
             )
-
-
-_DEPRECATED_CLASSES = {
-    "DataSetError": DatasetError,
-    "DataSetNotFoundError": DatasetNotFoundError,
-    "DataSetAlreadyExistsError": DatasetAlreadyExistsError,
-    "AbstractDataSet": AbstractDataset,
-    "AbstractVersionedDataSet": AbstractVersionedDataset,
-}
-
-
-def __getattr__(name):
-    if name in _DEPRECATED_CLASSES:
-        alias = _DEPRECATED_CLASSES[name]
-        warnings.warn(
-            f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
-            f"and the alias will be removed in Kedro 0.19.0",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        return alias
-    raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")