From dc8a7e7b84ecb6519a37505f5cd4def030491d64 Mon Sep 17 00:00:00 2001
From: BenjaminLevyQB <Benjamin.Levy@quantumblack.com>
Date: Thu, 9 Sep 2021 11:20:01 -0400
Subject: [PATCH 01/12] creating sql_dataset branch

---
 kedro/extras/datasets/pandas/sql_dataset.py   | 61 ++++++++++++++++---
 .../datasets/pandas/test_sql_dataset.py       | 30 ++++++++-
 2 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 5744fb5b56..768c1bd403 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -29,13 +29,20 @@
 
 import copy
 import re
+from pathlib import PurePosixPath
 from typing import Any, Dict, Optional
 
+import fsspec
 import pandas as pd
 from sqlalchemy import create_engine
 from sqlalchemy.exc import NoSuchModuleError
 
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro.io.core import (
+    AbstractDataSet,
+    DataSetError,
+    get_filepath_str,
+    get_protocol_and_path,
+)
 
 __all__ = ["SQLTableDataSet", "SQLQueryDataSet"]
 
@@ -278,13 +285,19 @@ class SQLQueryDataSet(AbstractDataSet):
 
     """
 
-    def __init__(
-        self, sql: str, credentials: Dict[str, Any], load_args: Dict[str, Any] = None
+    def __init__(  # pylint: disable=too-many-arguments
+        self,
+        sql: str = None,
+        filepath: str = None,
+        credentials: Dict[str, Any] = None,
+        load_args: Dict[str, Any] = None,
+        fs_args: Dict[str, Any] = None,
     ) -> None:
         """Creates a new ``SQLQueryDataSet``.
 
         Args:
             sql: The sql query statement.
+            filepath: A path to a file with a sql query statement
             credentials: A dictionary with a ``SQLAlchemy`` connection string.
                 Users are supposed to provide the connection string 'con'
                 through credentials. It overwrites `con` parameter in
@@ -297,14 +310,23 @@ def __init__(
                 https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html
                 To find all supported connection string formats, see here:
                 https://docs.sqlalchemy.org/en/13/core/engines.html#database-urls
+            fs_args: Extra arguments to pass into underlying filesystem class constructor
+                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as
+                to pass to the filesystem's `open` method through nested keys
+                `open_args_load` and `open_args_save`.
+                Here you can find all available arguments for `open`:
+                https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
+                All defaults are preserved, except `mode`, which is set to `r` when loading
+                and to `w` when saving.
 
         Raises:
             DataSetError: When either ``sql`` or ``con`` parameters is emtpy.
         """
 
-        if not sql:
+        if not (sql or filepath):
             raise DataSetError(
-                "`sql` argument cannot be empty. Please provide a sql query"
+                "`sql` and `filepath` arguments cannot both be empty."
+                "Please provide a sql query or path to a sql query file."
             )
 
         if not (credentials and "con" in credentials and credentials["con"]):
@@ -321,7 +343,23 @@ def __init__(
             else default_load_args
         )
 
-        self._load_args["sql"] = sql
+        # load sql query from file
+        if not sql:
+            # filesystem for loading sql file
+            _fs_args = copy.deepcopy(fs_args) or {}
+            _fs_open_args_load = _fs_args.pop("open_args_load", {})
+            _fs_credentials = _fs_args.pop("credentials", {})
+            protocol, path = get_protocol_and_path(str(filepath))
+
+            self._protocol = protocol
+            self._fs = fsspec.filesystem(self._protocol, **_fs_credentials, **_fs_args)
+
+            _fs_open_args_load.setdefault("mode", "r")
+            self._fs_open_args_load = _fs_open_args_load
+
+            self._load_args["filepath"] = path
+        else:
+            self._load_args["sql"] = sql
         self._load_args["con"] = credentials["con"]
 
     def _describe(self) -> Dict[str, Any]:
@@ -331,8 +369,17 @@ def _describe(self) -> Dict[str, Any]:
         return dict(sql=self._load_args["sql"], load_args=load_args)
 
     def _load(self) -> pd.DataFrame:
+        load_args = self._load_args.copy()
+
+        if "sql" not in load_args:
+            filepath = load_args.pop("filepath")
+            load_path = get_filepath_str(PurePosixPath(filepath), self._protocol)
+
+            with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
+                load_args["sql"] = fs_file.read()
+
         try:
-            return pd.read_sql_query(**self._load_args)
+            return pd.read_sql_query(**load_args)
         except ImportError as import_error:
             raise _get_missing_module_error(import_error) from import_error
         except NoSuchModuleError as exc:
diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py
index 49764fe497..2f54a0f2a2 100644
--- a/tests/extras/datasets/pandas/test_sql_dataset.py
+++ b/tests/extras/datasets/pandas/test_sql_dataset.py
@@ -51,6 +51,14 @@ def dummy_dataframe():
     return pd.DataFrame({"col1": [1, 2], "col2": [4, 5], "col3": [5, 6]})
 
 
+@pytest.fixture
+def sql_file(tmp_path):
+    file = tmp_path / "test.sql"
+    with file.open("w") as f:
+        f.write(SQL_QUERY)
+    return f.as_posix()
+
+
 @pytest.fixture(params=[{}])
 def table_data_set(request):
     kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION))
@@ -65,6 +73,13 @@ def query_data_set(request):
     return SQLQueryDataSet(**kwargs)
 
 
+@pytest.fixture(params=[{}])
+def query_file_data_set(request, sql_file):
+    kwargs = dict(filepath=sql_file, credentials=dict(con=CONNECTION))
+    kwargs.update(request.param)
+    return SQLQueryDataSet(**kwargs)
+
+
 class TestSQLTableDataSetLoad:
     @staticmethod
     def _assert_pd_called_once():
@@ -244,10 +259,13 @@ def _assert_pd_called_once():
         _callable.assert_called_once_with(sql=SQL_QUERY, con=CONNECTION)
 
     def test_empty_query_error(self):
-        """Check the error when instantiating with empty query"""
-        pattern = r"`sql` argument cannot be empty\. Please provide a sql query"
+        """Check the error when instantiating with empty query or file"""
+        pattern = (
+            r"`sql` and `filepath` arguments cannot both be empty\."
+            r"Please provide a sql query or path to a sql query file\."
+        )
         with pytest.raises(DataSetError, match=pattern):
-            SQLQueryDataSet(sql="", credentials=dict(con=CONNECTION))
+            SQLQueryDataSet(sql="", filepath="", credentials=dict(con=CONNECTION))
 
     def test_empty_con_error(self):
         """Check the error when instantiating with empty connection string"""
@@ -264,6 +282,12 @@ def test_load(self, mocker, query_data_set):
         query_data_set.load()
         self._assert_pd_called_once()
 
+    def test_load_query_file(self, mocker, query_file_data_set):
+        """Test `load` method with a query file"""
+        mocker.patch("pandas.read_sql_query")
+        query_file_data_set.load()
+        self._assert_pd_called_once()
+
     def test_load_driver_missing(self, mocker, query_data_set):
         """Test that if an unknown module/driver is encountered by SQLAlchemy
         then the error should contain the original error message"""

From dd425af685af42ec2d40b09c541bc31c75728161 Mon Sep 17 00:00:00 2001
From: Ben Levy <79479484+BenjaminLevyQB@users.noreply.github.com>
Date: Fri, 10 Sep 2021 11:28:44 -0400
Subject: [PATCH 02/12] Update kedro/extras/datasets/pandas/sql_dataset.py

Co-authored-by: Merel Theisen <49397448+MerelTheisenQB@users.noreply.github.com>
---
 kedro/extras/datasets/pandas/sql_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 768c1bd403..c7a939ac6e 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -297,7 +297,7 @@ def __init__(  # pylint: disable=too-many-arguments
 
         Args:
             sql: The sql query statement.
-            filepath: A path to a file with a sql query statement
+            filepath: A path to a file with a sql query statement.
             credentials: A dictionary with a ``SQLAlchemy`` connection string.
                 Users are supposed to provide the connection string 'con'
                 through credentials. It overwrites `con` parameter in

From 7fce17a1bd2017c1499449847e1260c7f6760869 Mon Sep 17 00:00:00 2001
From: BenjaminLevyQB <Benjamin.Levy@quantumblack.com>
Date: Fri, 10 Sep 2021 11:47:22 -0400
Subject: [PATCH 03/12] improving desc for sql dataset

---
 kedro/extras/datasets/pandas/sql_dataset.py      | 10 ++++++++--
 tests/extras/datasets/pandas/test_sql_dataset.py | 12 ++++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 768c1bd403..b6e2032afc 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -364,9 +364,15 @@ def __init__(  # pylint: disable=too-many-arguments
 
     def _describe(self) -> Dict[str, Any]:
         load_args = self._load_args.copy()
-        del load_args["sql"]
+        desc = {}
+        if "sql" in load_args:
+            desc["sql"] = load_args.pop("sql")
+        if "filepath" in load_args:
+            desc["filepath"] = str(load_args.pop("filepath"))
         del load_args["con"]
-        return dict(sql=self._load_args["sql"], load_args=load_args)
+        desc["load_args"] = load_args
+
+        return desc
 
     def _load(self) -> pd.DataFrame:
         load_args = self._load_args.copy()
diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py
index 2f54a0f2a2..b1b370e060 100644
--- a/tests/extras/datasets/pandas/test_sql_dataset.py
+++ b/tests/extras/datasets/pandas/test_sql_dataset.py
@@ -56,7 +56,7 @@ def sql_file(tmp_path):
     file = tmp_path / "test.sql"
     with file.open("w") as f:
         f.write(SQL_QUERY)
-    return f.as_posix()
+    return file.as_posix()
 
 
 @pytest.fixture(params=[{}])
@@ -330,8 +330,16 @@ def test_save_error(self, query_data_set, dummy_dataframe):
         with pytest.raises(DataSetError, match=pattern):
             query_data_set.save(dummy_dataframe)
 
-    def test_str_representation_sql(self, query_data_set):
+    def test_str_representation_sql(self, query_data_set, sql_file):
         """Test the data set instance string representation"""
         str_repr = str(query_data_set)
         assert f"SQLQueryDataSet(load_args={{}}, sql={SQL_QUERY})" in str_repr
         assert CONNECTION not in str_repr
+        assert sql_file not in str_repr
+
+    def test_str_representation_filepath(self, query_file_data_set, sql_file):
+        """Test the data set instance string representation with filepath arg."""
+        str_repr = str(query_file_data_set)
+        assert f"SQLQueryDataSet(filepath={str(sql_file)}, load_args={{}}" in str_repr
+        assert CONNECTION not in str_repr
+        assert SQL_QUERY not in str_repr

From 68e734e69a058da20cf07f8b4964983086d7ee99 Mon Sep 17 00:00:00 2001
From: BenjaminLevyQB <Benjamin.Levy@quantumblack.com>
Date: Fri, 10 Sep 2021 11:53:46 -0400
Subject: [PATCH 04/12] Updating RELEASE.md

---
 RELEASE.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index 28a89a2a86..2169c0a9e4 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -20,6 +20,7 @@
 * Bumped minimum required `fsspec` version to 2021.04.
 * Fixed the `kedro install` and `kedro build-reqs` flows when uninstalled dependencies are present in a project's `settings.py`, `context.py` or `hooks.py` ([Issue #829](https://github.com/quantumblacklabs/kedro/issues/829)).
 * Imports are now refactored at `kedro pipeline package` and `kedro pipeline pull` time, so that _aliasing_ a modular pipeline doesn't break it.
+* Added option in `SQLQueryDataSet` to specify a `filepath` with a sql query, in addition to supplying the entire query in the constructor
 
 ## Minor breaking changes to the API
 
@@ -30,6 +31,7 @@
 ## Thanks for supporting contributions
 [Moussa Taifi](https://github.com/moutai),
 [Deepyaman Datta](https://github.com/deepyaman)
+[Benjamin Levy](https://github.com/BenjaminLevyQB)
 
 # Release 0.17.4
 

From fdfa53ef4e0d878a42bbf9b6f51b10d25b869ba3 Mon Sep 17 00:00:00 2001
From: Ben Levy <79479484+BenjaminLevyQB@users.noreply.github.com>
Date: Wed, 13 Oct 2021 11:40:01 -0400
Subject: [PATCH 05/12] Update RELEASE.md

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>
---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 996b93a11f..a6fe25bb93 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -37,7 +37,7 @@
 * Bumped minimum required `fsspec` version to 2021.04.
 * Fixed the `kedro install` and `kedro build-reqs` flows when uninstalled dependencies are present in a project's `settings.py`, `context.py` or `hooks.py` ([Issue #829](https://github.com/quantumblacklabs/kedro/issues/829)).
 * Imports are now refactored at `kedro pipeline package` and `kedro pipeline pull` time, so that _aliasing_ a modular pipeline doesn't break it.
-* Added option in `SQLQueryDataSet` to specify a `filepath` with a sql query, in addition to supplying the entire query in the constructor
+* Added option to `pandas.SQLQueryDataSet` to specify a `filepath` with a SQL query, in addition to the current method of supplying the query itself in the `sql` argument.
 
 ## Minor breaking changes to the API
 * Pinned `dynaconf` to `<3.1.6` because the method signature for `_validate_items` changed which is used in Kedro.

From 53dc89e032e7a86836c65f95fe9d4e70901a007b Mon Sep 17 00:00:00 2001
From: Ben Levy <79479484+BenjaminLevyQB@users.noreply.github.com>
Date: Wed, 13 Oct 2021 11:40:13 -0400
Subject: [PATCH 06/12] Update RELEASE.md

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>
---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index a6fe25bb93..6a6f517ede 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -48,7 +48,7 @@
 
 ## Thanks for supporting contributions
 [Moussa Taifi](https://github.com/moutai),
-[Deepyaman Datta](https://github.com/deepyaman)
+[Deepyaman Datta](https://github.com/deepyaman),
 [Benjamin Levy](https://github.com/BenjaminLevyQB)
 
 # Release 0.17.4

From a64089b60510e24a26f9206b6f189f88758ac899 Mon Sep 17 00:00:00 2001
From: BenjaminLevyQB <Benjamin.Levy@quantumblack.com>
Date: Wed, 13 Oct 2021 14:09:02 -0400
Subject: [PATCH 07/12] changed dataset to raise error if both `sql` and
 `filepath` are provided

---
 kedro/extras/datasets/pandas/sql_dataset.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 02e4aa2208..22ef000f3f 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -322,6 +322,11 @@ def __init__(  # pylint: disable=too-many-arguments
         Raises:
             DataSetError: When either ``sql`` or ``con`` parameters is emtpy.
         """
+        if sql and filepath:
+            raise DataSetError(
+                "`sql` and `filepath` arguments cannot both be provided."
+                "Please only provide one."
+            )
 
         if not (sql or filepath):
             raise DataSetError(

From 862fafcce60b802a7836a69d5fa973a5c0b312a6 Mon Sep 17 00:00:00 2001
From: BenjaminLevyQB <Benjamin.Levy@quantumblack.com>
Date: Wed, 13 Oct 2021 14:13:39 -0400
Subject: [PATCH 08/12] moved `filepath` argument so that this is no longer a
 breaking change

---
 kedro/extras/datasets/pandas/sql_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 22ef000f3f..7858efae10 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -288,16 +288,15 @@ class SQLQueryDataSet(AbstractDataSet):
     def __init__(  # pylint: disable=too-many-arguments
         self,
         sql: str = None,
-        filepath: str = None,
         credentials: Dict[str, Any] = None,
         load_args: Dict[str, Any] = None,
         fs_args: Dict[str, Any] = None,
+        filepath: str = None,
     ) -> None:
         """Creates a new ``SQLQueryDataSet``.
 
         Args:
             sql: The sql query statement.
-            filepath: A path to a file with a sql query statement.
             credentials: A dictionary with a ``SQLAlchemy`` connection string.
                 Users are supposed to provide the connection string 'con'
                 through credentials. It overwrites `con` parameter in
@@ -318,6 +317,7 @@ def __init__(  # pylint: disable=too-many-arguments
                 https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
                 All defaults are preserved, except `mode`, which is set to `r` when loading
                 and to `w` when saving.
+            filepath: A path to a file with a sql query statement.
 
         Raises:
             DataSetError: When either ``sql`` or ``con`` parameters is emtpy.

From 6b48a3e0215f056d97dc2011820ef3dae5efda01 Mon Sep 17 00:00:00 2001
From: BenjaminLevyQB <Benjamin.Levy@quantumblack.com>
Date: Mon, 25 Oct 2021 09:58:46 -0400
Subject: [PATCH 09/12] resolving small nits from PR conversation

---
 kedro/extras/datasets/pandas/sql_dataset.py   | 37 ++++++++-----------
 .../datasets/pandas/test_sql_dataset.py       | 27 +++++++++++---
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 7858efae10..882201c922 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -315,8 +315,7 @@ def __init__(  # pylint: disable=too-many-arguments
                 `open_args_load` and `open_args_save`.
                 Here you can find all available arguments for `open`:
                 https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
-                All defaults are preserved, except `mode`, which is set to `r` when loading
-                and to `w` when saving.
+                All defaults are preserved, except `mode`, which is set to `r` when loading.
             filepath: A path to a file with a sql query statement.
 
         Raises:
@@ -349,44 +348,38 @@ def __init__(  # pylint: disable=too-many-arguments
         )
 
         # load sql query from file
-        if not sql:
+        if sql:
+            self._load_args["sql"] = sql
+            self._filepath = None
+        else:
             # filesystem for loading sql file
             _fs_args = copy.deepcopy(fs_args) or {}
-            _fs_open_args_load = _fs_args.pop("open_args_load", {})
             _fs_credentials = _fs_args.pop("credentials", {})
             protocol, path = get_protocol_and_path(str(filepath))
 
             self._protocol = protocol
             self._fs = fsspec.filesystem(self._protocol, **_fs_credentials, **_fs_args)
 
-            _fs_open_args_load.setdefault("mode", "r")
-            self._fs_open_args_load = _fs_open_args_load
-
-            self._load_args["filepath"] = path
-        else:
-            self._load_args["sql"] = sql
+            # self._load_args["filepath"] = path
+            self._filepath = path
         self._load_args["con"] = credentials["con"]
 
     def _describe(self) -> Dict[str, Any]:
-        load_args = self._load_args.copy()
+        load_args = copy.deepcopy(self._load_args)
         desc = {}
-        if "sql" in load_args:
-            desc["sql"] = load_args.pop("sql")
-        if "filepath" in load_args:
-            desc["filepath"] = str(load_args.pop("filepath"))
+        desc["sql"] = str(load_args.pop("sql", None))
+        desc["filepath"] = str(self._filepath)
         del load_args["con"]
-        desc["load_args"] = load_args
+        desc["load_args"] = str(load_args)
 
         return desc
 
     def _load(self) -> pd.DataFrame:
-        load_args = self._load_args.copy()
-
-        if "sql" not in load_args:
-            filepath = load_args.pop("filepath")
-            load_path = get_filepath_str(PurePosixPath(filepath), self._protocol)
+        load_args = copy.deepcopy(self._load_args)
 
-            with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
+        if self._filepath:
+            load_path = get_filepath_str(PurePosixPath(self._filepath), self._protocol)
+            with self._fs.open(load_path, mode="r") as fs_file:
                 load_args["sql"] = fs_file.read()
 
         try:
diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py
index b1b370e060..676bdfea17 100644
--- a/tests/extras/datasets/pandas/test_sql_dataset.py
+++ b/tests/extras/datasets/pandas/test_sql_dataset.py
@@ -28,6 +28,7 @@
 
 # pylint: disable=no-member
 
+from pathlib import PosixPath
 from typing import Any
 
 import pandas as pd
@@ -52,10 +53,11 @@ def dummy_dataframe():
 
 
 @pytest.fixture
-def sql_file(tmp_path):
+def sql_file(tmp_path: PosixPath):
     file = tmp_path / "test.sql"
-    with file.open("w") as f:
-        f.write(SQL_QUERY)
+    # with file.open("w") as f:
+    #     f.write(SQL_QUERY)
+    file.write_text(SQL_QUERY)
     return file.as_posix()
 
 
@@ -333,13 +335,28 @@ def test_save_error(self, query_data_set, dummy_dataframe):
     def test_str_representation_sql(self, query_data_set, sql_file):
         """Test the data set instance string representation"""
         str_repr = str(query_data_set)
-        assert f"SQLQueryDataSet(load_args={{}}, sql={SQL_QUERY})" in str_repr
+        assert (
+            f"SQLQueryDataSet(filepath=None, load_args={{}}, sql={SQL_QUERY})"
+            in str_repr
+        )
         assert CONNECTION not in str_repr
         assert sql_file not in str_repr
 
     def test_str_representation_filepath(self, query_file_data_set, sql_file):
         """Test the data set instance string representation with filepath arg."""
         str_repr = str(query_file_data_set)
-        assert f"SQLQueryDataSet(filepath={str(sql_file)}, load_args={{}}" in str_repr
+        assert (
+            f"SQLQueryDataSet(filepath={str(sql_file)}, load_args={{}}, sql=None)"
+            in str_repr
+        )
         assert CONNECTION not in str_repr
         assert SQL_QUERY not in str_repr
+
+    def test_sql_and_filepath_args(self, sql_file):
+        """Test that an error is raised when both `sql` and `filepath` args are given."""
+        pattern = (
+            r"`sql` and `filepath` arguments cannot both be provided."
+            r"Please only provide one."
+        )
+        with pytest.raises(DataSetError, match=pattern):
+            SQLQueryDataSet(sql=SQL_QUERY, filepath=sql_file)

From 7c507b9a5b6abf05ec5e53b8202c40a04c99050c Mon Sep 17 00:00:00 2001
From: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>
Date: Mon, 25 Oct 2021 16:01:46 +0100
Subject: [PATCH 10/12] Update kedro/extras/datasets/pandas/sql_dataset.py

---
 kedro/extras/datasets/pandas/sql_dataset.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 882201c922..818162c798 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -359,8 +359,6 @@ def __init__(  # pylint: disable=too-many-arguments
 
             self._protocol = protocol
             self._fs = fsspec.filesystem(self._protocol, **_fs_credentials, **_fs_args)
-
-            # self._load_args["filepath"] = path
             self._filepath = path
         self._load_args["con"] = credentials["con"]
 

From d36d998a5394e5e081886d3ea0df98f6bf4f35c3 Mon Sep 17 00:00:00 2001
From: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>
Date: Mon, 25 Oct 2021 16:02:06 +0100
Subject: [PATCH 11/12] Update kedro/extras/datasets/pandas/sql_dataset.py

---
 kedro/extras/datasets/pandas/sql_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/extras/datasets/pandas/sql_dataset.py b/kedro/extras/datasets/pandas/sql_dataset.py
index 818162c798..d4a3d134d0 100644
--- a/kedro/extras/datasets/pandas/sql_dataset.py
+++ b/kedro/extras/datasets/pandas/sql_dataset.py
@@ -319,7 +319,7 @@ def __init__(  # pylint: disable=too-many-arguments
             filepath: A path to a file with a sql query statement.
 
         Raises:
-            DataSetError: When either ``sql`` or ``con`` parameters is emtpy.
+            DataSetError: When either ``sql`` or ``con`` parameters is empty.
         """
         if sql and filepath:
             raise DataSetError(

From a90a1def72ed0c463d887ddd3f9fb4ac6039a519 Mon Sep 17 00:00:00 2001
From: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>
Date: Mon, 25 Oct 2021 16:26:04 +0100
Subject: [PATCH 12/12] Update tests/extras/datasets/pandas/test_sql_dataset.py

---
 tests/extras/datasets/pandas/test_sql_dataset.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/extras/datasets/pandas/test_sql_dataset.py b/tests/extras/datasets/pandas/test_sql_dataset.py
index 676bdfea17..10ef0d3b84 100644
--- a/tests/extras/datasets/pandas/test_sql_dataset.py
+++ b/tests/extras/datasets/pandas/test_sql_dataset.py
@@ -55,8 +55,6 @@ def dummy_dataframe():
 @pytest.fixture
 def sql_file(tmp_path: PosixPath):
     file = tmp_path / "test.sql"
-    # with file.open("w") as f:
-    #     f.write(SQL_QUERY)
     file.write_text(SQL_QUERY)
     return file.as_posix()