From dc8a7e7b84ecb6519a37505f5cd4def030491d64 Mon Sep 17 00:00:00 2001 From: BenjaminLevyQB Date: Thu, 9 Sep 2021 11:20:01 -0400 Subject: [PATCH 01/12] creating sql_dataset branch --- kedro/extras/datasets/pandas/sql_dataset.py | 61 ++++++++++++++++--- .../datasets/pandas/test_sql_dataset.py | 30 ++++++++- 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 5744fb5b56..768c1bd403 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -29,13 +29,20 @@ import copy import re +from pathlib import PurePosixPath from typing import Any, Dict, Optional +import fsspec import pandas as pd from sqlalchemy import create_engine from sqlalchemy.exc import NoSuchModuleError -from kedro.io.core import AbstractDataSet, DataSetError +from kedro.io.core import ( + AbstractDataSet, + DataSetError, + get_filepath_str, + get_protocol_and_path, +) __all__ = ["SQLTableDataSet", "SQLQueryDataSet"] @@ -278,13 +285,19 @@ class SQLQueryDataSet(AbstractDataSet): """ - def __init__( - self, sql: str, credentials: Dict[str, Any], load_args: Dict[str, Any] = None + def __init__( # pylint: disable=too-many-arguments + self, + sql: str = None, + filepath: str = None, + credentials: Dict[str, Any] = None, + load_args: Dict[str, Any] = None, + fs_args: Dict[str, Any] = None, ) -> None: """Creates a new ``SQLQueryDataSet``. Args: sql: The sql query statement. + filepath: A path to a file with a sql query statement credentials: A dictionary with a ``SQLAlchemy`` connection string. Users are supposed to provide the connection string 'con' through credentials. It overwrites `con` parameter in @@ -297,14 +310,23 @@ def __init__( https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html To find all supported connection string formats, see here: https://docs.sqlalchemy.org/en/13/core/engines.html#database-urls + fs_args: Extra arguments to pass into underlying filesystem class constructor + (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as + to pass to the filesystem's `open` method through nested keys + `open_args_load` and `open_args_save`. + Here you can find all available arguments for `open`: + https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open + All defaults are preserved, except `mode`, which is set to `r` when loading + and to `w` when saving. Raises: DataSetError: When either ``sql`` or ``con`` parameters is emtpy. """ - if not sql: + if not (sql or filepath): raise DataSetError( - "`sql` argument cannot be empty. Please provide a sql query" + "`sql` and `filepath` arguments cannot both be empty." + "Please provide a sql query or path to a sql query file." ) if not (credentials and "con" in credentials and credentials["con"]): @@ -321,7 +343,23 @@ def __init__( else default_load_args ) - self._load_args["sql"] = sql + # load sql query from file + if not sql: + # filesystem for loading sql file + _fs_args = copy.deepcopy(fs_args) or {} + _fs_open_args_load = _fs_args.pop("open_args_load", {}) + _fs_credentials = _fs_args.pop("credentials", {}) + protocol, path = get_protocol_and_path(str(filepath)) + + self._protocol = protocol + self._fs = fsspec.filesystem(self._protocol, **_fs_credentials, **_fs_args) + + _fs_open_args_load.setdefault("mode", "r") + self._fs_open_args_load = _fs_open_args_load + + self._load_args["filepath"] = path + else: + self._load_args["sql"] = sql self._load_args["con"] = credentials["con"] def _describe(self) -> Dict[str, Any]: @@ -331,8 +369,17 @@ def _describe(self) -> Dict[str, Any]: return dict(sql=self._load_args["sql"], load_args=load_args) def _load(self) -> pd.DataFrame: + load_args = self._load_args.copy() + + if "sql" not in load_args: + filepath = load_args.pop("filepath") + load_path = get_filepath_str(PurePosixPath(filepath), self._protocol) + + with self._fs.open(load_path, **self._fs_open_args_load) as fs_file: + load_args["sql"] = fs_file.read() + try: - return pd.read_sql_query(**self._load_args) + return pd.read_sql_query(**load_args) except ImportError as import_error: raise _get_missing_module_error(import_error) from import_error except NoSuchModuleError as exc: diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py index 49764fe497..2f54a0f2a2 100644 --- a/tests/extras/datasets/pandas/test_sql_dataset.py +++ b/tests/extras/datasets/pandas/test_sql_dataset.py @@ -51,6 +51,14 @@ def dummy_dataframe(): return pd.DataFrame({"col1": [1, 2], "col2": [4, 5], "col3": [5, 6]}) +@pytest.fixture +def sql_file(tmp_path): + file = tmp_path / "test.sql" + with file.open("w") as f: + f.write(SQL_QUERY) + return f.as_posix() + + @pytest.fixture(params=[{}]) def table_data_set(request): kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) @@ -65,6 +73,13 @@ def query_data_set(request): return SQLQueryDataSet(**kwargs) +@pytest.fixture(params=[{}]) +def query_file_data_set(request, sql_file): + kwargs = dict(filepath=sql_file, credentials=dict(con=CONNECTION)) + kwargs.update(request.param) + return SQLQueryDataSet(**kwargs) + + class TestSQLTableDataSetLoad: @staticmethod def _assert_pd_called_once(): @@ -244,10 +259,13 @@ def _assert_pd_called_once(): _callable.assert_called_once_with(sql=SQL_QUERY, con=CONNECTION) def test_empty_query_error(self): - """Check the error when instantiating with empty query""" - pattern = r"`sql` argument cannot be empty\. Please provide a sql query" + """Check the error when instantiating with empty query or file""" + pattern = ( + r"`sql` and `filepath` arguments cannot both be empty\." + r"Please provide a sql query or path to a sql query file\." + ) with pytest.raises(DataSetError, match=pattern): - SQLQueryDataSet(sql="", credentials=dict(con=CONNECTION)) + SQLQueryDataSet(sql="", filepath="", credentials=dict(con=CONNECTION)) def test_empty_con_error(self): """Check the error when instantiating with empty connection string""" @@ -264,6 +282,12 @@ def test_load(self, mocker, query_data_set): query_data_set.load() self._assert_pd_called_once() + def test_load_query_file(self, mocker, query_file_data_set): + """Test `load` method with a query file""" + mocker.patch("pandas.read_sql_query") + query_file_data_set.load() + self._assert_pd_called_once() + def test_load_driver_missing(self, mocker, query_data_set): """Test that if an unknown module/driver is encountered by SQLAlchemy then the error should contain the original error message""" From dd425af685af42ec2d40b09c541bc31c75728161 Mon Sep 17 00:00:00 2001 From: Ben Levy <79479484+BenjaminLevyQB@users.noreply.github.com> Date: Fri, 10 Sep 2021 11:28:44 -0400 Subject: [PATCH 02/12] Update kedro/extras/datasets/pandas/sql_dataset.py Co-authored-by: Merel Theisen <49397448+MerelTheisenQB@users.noreply.github.com> --- kedro/extras/datasets/pandas/sql_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 768c1bd403..c7a939ac6e 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -297,7 +297,7 @@ def __init__( # pylint: disable=too-many-arguments Args: sql: The sql query statement. - filepath: A path to a file with a sql query statement + filepath: A path to a file with a sql query statement. credentials: A dictionary with a ``SQLAlchemy`` connection string. Users are supposed to provide the connection string 'con' through credentials. It overwrites `con` parameter in From 7fce17a1bd2017c1499449847e1260c7f6760869 Mon Sep 17 00:00:00 2001 From: BenjaminLevyQB Date: Fri, 10 Sep 2021 11:47:22 -0400 Subject: [PATCH 03/12] improving desc for sql dataset --- kedro/extras/datasets/pandas/sql_dataset.py | 10 ++++++++-- tests/extras/datasets/pandas/test_sql_dataset.py | 12 ++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 768c1bd403..b6e2032afc 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -364,9 +364,15 @@ def __init__( # pylint: disable=too-many-arguments def _describe(self) -> Dict[str, Any]: load_args = self._load_args.copy() - del load_args["sql"] + desc = {} + if "sql" in load_args: + desc["sql"] = load_args.pop("sql") + if "filepath" in load_args: + desc["filepath"] = str(load_args.pop("filepath")) del load_args["con"] - return dict(sql=self._load_args["sql"], load_args=load_args) + desc["load_args"] = load_args + + return desc def _load(self) -> pd.DataFrame: load_args = self._load_args.copy() diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py index 2f54a0f2a2..b1b370e060 100644 --- a/tests/extras/datasets/pandas/test_sql_dataset.py +++ b/tests/extras/datasets/pandas/test_sql_dataset.py @@ -56,7 +56,7 @@ def sql_file(tmp_path): file = tmp_path / "test.sql" with file.open("w") as f: f.write(SQL_QUERY) - return f.as_posix() + return file.as_posix() @pytest.fixture(params=[{}]) @@ -330,8 +330,16 @@ def test_save_error(self, query_data_set, dummy_dataframe): with pytest.raises(DataSetError, match=pattern): query_data_set.save(dummy_dataframe) - def test_str_representation_sql(self, query_data_set): + def test_str_representation_sql(self, query_data_set, sql_file): """Test the data set instance string representation""" str_repr = str(query_data_set) assert f"SQLQueryDataSet(load_args={{}}, sql={SQL_QUERY})" in str_repr assert CONNECTION not in str_repr + assert sql_file not in str_repr + + def test_str_representation_filepath(self, query_file_data_set, sql_file): + """Test the data set instance string representation with filepath arg.""" + str_repr = str(query_file_data_set) + assert f"SQLQueryDataSet(filepath={str(sql_file)}, load_args={{}}" in str_repr + assert CONNECTION not in str_repr + assert SQL_QUERY not in str_repr From 68e734e69a058da20cf07f8b4964983086d7ee99 Mon Sep 17 00:00:00 2001 From: BenjaminLevyQB Date: Fri, 10 Sep 2021 11:53:46 -0400 Subject: [PATCH 04/12] Updating RELEASE.md --- RELEASE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 28a89a2a86..2169c0a9e4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -20,6 +20,7 @@ * Bumped minimum required `fsspec` version to 2021.04. * Fixed the `kedro install` and `kedro build-reqs` flows when uninstalled dependencies are present in a project's `settings.py`, `context.py` or `hooks.py` ([Issue #829](https://github.com/quantumblacklabs/kedro/issues/829)). * Imports are now refactored at `kedro pipeline package` and `kedro pipeline pull` time, so that _aliasing_ a modular pipeline doesn't break it. +* Added option in `SQLQueryDataSet` to specify a `filepath` with a sql query, in addition to supplying the entire query in the constructor ## Minor breaking changes to the API @@ -30,6 +31,7 @@ ## Thanks for supporting contributions [Moussa Taifi](https://github.com/moutai), [Deepyaman Datta](https://github.com/deepyaman) +[Benjamin Levy](https://github.com/BenjaminLevyQB) # Release 0.17.4 From fdfa53ef4e0d878a42bbf9b6f51b10d25b869ba3 Mon Sep 17 00:00:00 2001 From: Ben Levy <79479484+BenjaminLevyQB@users.noreply.github.com> Date: Wed, 13 Oct 2021 11:40:01 -0400 Subject: [PATCH 05/12] Update RELEASE.md Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com> --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 996b93a11f..a6fe25bb93 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -37,7 +37,7 @@ * Bumped minimum required `fsspec` version to 2021.04. * Fixed the `kedro install` and `kedro build-reqs` flows when uninstalled dependencies are present in a project's `settings.py`, `context.py` or `hooks.py` ([Issue #829](https://github.com/quantumblacklabs/kedro/issues/829)). * Imports are now refactored at `kedro pipeline package` and `kedro pipeline pull` time, so that _aliasing_ a modular pipeline doesn't break it. -* Added option in `SQLQueryDataSet` to specify a `filepath` with a sql query, in addition to supplying the entire query in the constructor +* Added option to `pandas.SQLQueryDataSet` to specify a `filepath` with a SQL query, in addition to the current method of supplying the query itself in the `sql` argument. ## Minor breaking changes to the API * Pinned `dynaconf` to `<3.1.6` because the method signature for `_validate_items` changed which is used in Kedro. From 53dc89e032e7a86836c65f95fe9d4e70901a007b Mon Sep 17 00:00:00 2001 From: Ben Levy <79479484+BenjaminLevyQB@users.noreply.github.com> Date: Wed, 13 Oct 2021 11:40:13 -0400 Subject: [PATCH 06/12] Update RELEASE.md Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com> --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index a6fe25bb93..6a6f517ede 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -48,7 +48,7 @@ ## Thanks for supporting contributions [Moussa Taifi](https://github.com/moutai), -[Deepyaman Datta](https://github.com/deepyaman) +[Deepyaman Datta](https://github.com/deepyaman), [Benjamin Levy](https://github.com/BenjaminLevyQB) # Release 0.17.4 From a64089b60510e24a26f9206b6f189f88758ac899 Mon Sep 17 00:00:00 2001 From: BenjaminLevyQB Date: Wed, 13 Oct 2021 14:09:02 -0400 Subject: [PATCH 07/12] changed dataset to raise error if both `sql` and `filepath` are provided --- kedro/extras/datasets/pandas/sql_dataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 02e4aa2208..22ef000f3f 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -322,6 +322,11 @@ def __init__( # pylint: disable=too-many-arguments Raises: DataSetError: When either ``sql`` or ``con`` parameters is emtpy. """ + if sql and filepath: + raise DataSetError( + "`sql` and `filepath` arguments cannot both be provided." + "Please only provide one." + ) if not (sql or filepath): raise DataSetError( From 862fafcce60b802a7836a69d5fa973a5c0b312a6 Mon Sep 17 00:00:00 2001 From: BenjaminLevyQB Date: Wed, 13 Oct 2021 14:13:39 -0400 Subject: [PATCH 08/12] moved `filepath` argument so that this is no longer a breaking change --- kedro/extras/datasets/pandas/sql_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 22ef000f3f..7858efae10 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -288,16 +288,15 @@ class SQLQueryDataSet(AbstractDataSet): def __init__( # pylint: disable=too-many-arguments self, sql: str = None, - filepath: str = None, credentials: Dict[str, Any] = None, load_args: Dict[str, Any] = None, fs_args: Dict[str, Any] = None, + filepath: str = None, ) -> None: """Creates a new ``SQLQueryDataSet``. Args: sql: The sql query statement. - filepath: A path to a file with a sql query statement. credentials: A dictionary with a ``SQLAlchemy`` connection string. Users are supposed to provide the connection string 'con' through credentials. It overwrites `con` parameter in @@ -318,6 +317,7 @@ def __init__( # pylint: disable=too-many-arguments https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open All defaults are preserved, except `mode`, which is set to `r` when loading and to `w` when saving. + filepath: A path to a file with a sql query statement. Raises: DataSetError: When either ``sql`` or ``con`` parameters is emtpy. From 6b48a3e0215f056d97dc2011820ef3dae5efda01 Mon Sep 17 00:00:00 2001 From: BenjaminLevyQB Date: Mon, 25 Oct 2021 09:58:46 -0400 Subject: [PATCH 09/12] resolving small nits from PR conversation --- kedro/extras/datasets/pandas/sql_dataset.py | 37 ++++++++----------- .../datasets/pandas/test_sql_dataset.py | 27 +++++++++++--- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 7858efae10..882201c922 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -315,8 +315,7 @@ def __init__( # pylint: disable=too-many-arguments `open_args_load` and `open_args_save`. Here you can find all available arguments for `open`: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open - All defaults are preserved, except `mode`, which is set to `r` when loading - and to `w` when saving. + All defaults are preserved, except `mode`, which is set to `r` when loading. filepath: A path to a file with a sql query statement. Raises: @@ -349,44 +348,38 @@ def __init__( # pylint: disable=too-many-arguments ) # load sql query from file - if not sql: + if sql: + self._load_args["sql"] = sql + self._filepath = None + else: # filesystem for loading sql file _fs_args = copy.deepcopy(fs_args) or {} - _fs_open_args_load = _fs_args.pop("open_args_load", {}) _fs_credentials = _fs_args.pop("credentials", {}) protocol, path = get_protocol_and_path(str(filepath)) self._protocol = protocol self._fs = fsspec.filesystem(self._protocol, **_fs_credentials, **_fs_args) - _fs_open_args_load.setdefault("mode", "r") - self._fs_open_args_load = _fs_open_args_load - - self._load_args["filepath"] = path - else: - self._load_args["sql"] = sql + # self._load_args["filepath"] = path + self._filepath = path self._load_args["con"] = credentials["con"] def _describe(self) -> Dict[str, Any]: - load_args = self._load_args.copy() + load_args = copy.deepcopy(self._load_args) desc = {} - if "sql" in load_args: - desc["sql"] = load_args.pop("sql") - if "filepath" in load_args: - desc["filepath"] = str(load_args.pop("filepath")) + desc["sql"] = str(load_args.pop("sql", None)) + desc["filepath"] = str(self._filepath) del load_args["con"] - desc["load_args"] = load_args + desc["load_args"] = str(load_args) return desc def _load(self) -> pd.DataFrame: - load_args = self._load_args.copy() - - if "sql" not in load_args: - filepath = load_args.pop("filepath") - load_path = get_filepath_str(PurePosixPath(filepath), self._protocol) + load_args = copy.deepcopy(self._load_args) - with self._fs.open(load_path, **self._fs_open_args_load) as fs_file: + if self._filepath: + load_path = get_filepath_str(PurePosixPath(self._filepath), self._protocol) + with self._fs.open(load_path, mode="r") as fs_file: load_args["sql"] = fs_file.read() try: diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py index b1b370e060..676bdfea17 100644 --- a/tests/extras/datasets/pandas/test_sql_dataset.py +++ b/tests/extras/datasets/pandas/test_sql_dataset.py @@ -28,6 +28,7 @@ # pylint: disable=no-member +from pathlib import PosixPath from typing import Any import pandas as pd @@ -52,10 +53,11 @@ def dummy_dataframe(): @pytest.fixture -def sql_file(tmp_path): +def sql_file(tmp_path: PosixPath): file = tmp_path / "test.sql" - with file.open("w") as f: - f.write(SQL_QUERY) + # with file.open("w") as f: + # f.write(SQL_QUERY) + file.write_text(SQL_QUERY) return file.as_posix() @@ -333,13 +335,28 @@ def test_save_error(self, query_data_set, dummy_dataframe): def test_str_representation_sql(self, query_data_set, sql_file): """Test the data set instance string representation""" str_repr = str(query_data_set) - assert f"SQLQueryDataSet(load_args={{}}, sql={SQL_QUERY})" in str_repr + assert ( + f"SQLQueryDataSet(filepath=None, load_args={{}}, sql={SQL_QUERY})" + in str_repr + ) assert CONNECTION not in str_repr assert sql_file not in str_repr def test_str_representation_filepath(self, query_file_data_set, sql_file): """Test the data set instance string representation with filepath arg.""" str_repr = str(query_file_data_set) - assert f"SQLQueryDataSet(filepath={str(sql_file)}, load_args={{}}" in str_repr + assert ( + f"SQLQueryDataSet(filepath={str(sql_file)}, load_args={{}}, sql=None)" + in str_repr + ) assert CONNECTION not in str_repr assert SQL_QUERY not in str_repr + + def test_sql_and_filepath_args(self, sql_file): + """Test that an error is raised when both `sql` and `filepath` args are given.""" + pattern = ( + r"`sql` and `filepath` arguments cannot both be provided." + r"Please only provide one." + ) + with pytest.raises(DataSetError, match=pattern): + SQLQueryDataSet(sql=SQL_QUERY, filepath=sql_file) From 7c507b9a5b6abf05ec5e53b8202c40a04c99050c Mon Sep 17 00:00:00 2001 From: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com> Date: Mon, 25 Oct 2021 16:01:46 +0100 Subject: [PATCH 10/12] Update kedro/extras/datasets/pandas/sql_dataset.py --- kedro/extras/datasets/pandas/sql_dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 882201c922..818162c798 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -359,8 +359,6 @@ def __init__( # pylint: disable=too-many-arguments self._protocol = protocol self._fs = fsspec.filesystem(self._protocol, **_fs_credentials, **_fs_args) - - # self._load_args["filepath"] = path self._filepath = path self._load_args["con"] = credentials["con"] From d36d998a5394e5e081886d3ea0df98f6bf4f35c3 Mon Sep 17 00:00:00 2001 From: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com> Date: Mon, 25 Oct 2021 16:02:06 +0100 Subject: [PATCH 11/12] Update kedro/extras/datasets/pandas/sql_dataset.py --- kedro/extras/datasets/pandas/sql_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py index 818162c798..d4a3d134d0 100644 --- a/kedro/extras/datasets/pandas/sql_dataset.py +++ b/kedro/extras/datasets/pandas/sql_dataset.py @@ -319,7 +319,7 @@ def __init__( # pylint: disable=too-many-arguments filepath: A path to a file with a sql query statement. Raises: - DataSetError: When either ``sql`` or ``con`` parameters is emtpy. + DataSetError: When either ``sql`` or ``con`` parameters is empty. """ if sql and filepath: raise DataSetError( From a90a1def72ed0c463d887ddd3f9fb4ac6039a519 Mon Sep 17 00:00:00 2001 From: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com> Date: Mon, 25 Oct 2021 16:26:04 +0100 Subject: [PATCH 12/12] Update tests/extras/datasets/pandas/test_sql_dataset.py --- tests/extras/datasets/pandas/test_sql_dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py index 676bdfea17..10ef0d3b84 100644 --- a/tests/extras/datasets/pandas/test_sql_dataset.py +++ b/tests/extras/datasets/pandas/test_sql_dataset.py @@ -55,8 +55,6 @@ def dummy_dataframe(): @pytest.fixture def sql_file(tmp_path: PosixPath): file = tmp_path / "test.sql" - # with file.open("w") as f: - # f.write(SQL_QUERY) file.write_text(SQL_QUERY) return file.as_posix()