From eaf51cbcad1465ace8e0a170f3c95fea00f2673e Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 02:46:50 -0700 Subject: [PATCH 1/9] Implement simple SQL start and docs --- README.rst | 31 +++++++++++++++++++++++++ docs/index.rst | 37 ++++++++++++++++++++++++++++++ docs/sql_on_ray.rst | 5 ++++ modin/sql/__init__.py | 5 ++++ modin/sql/connection.py | 51 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+) create mode 100644 docs/sql_on_ray.rst create mode 100644 modin/sql/__init__.py create mode 100644 modin/sql/connection.py diff --git a/README.rst b/README.rst index 22e5deebe0b..ede9a0283ee 100644 --- a/README.rst +++ b/README.rst @@ -35,6 +35,37 @@ Pandas on Ray **Pandas on Ray is currently for experimental use only. Requests and contributions are welcome!** +SQL on Ray +---------- + +*SQL on Ray is currently under development. Coming Soon!* + +**We have implemented a simple example that can be found below. Feedback welcome!** + +.. code-block:: python + + >>> import modin.sql as sql + Process STDOUT and STDERR is being redirected to /tmp/raylogs/. + Waiting for redis server at 127.0.0.1:46487 to respond... + Waiting for redis server at 127.0.0.1:23966 to respond... + Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. + + ====================================================================== + View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a + ====================================================================== + + >>> conn = sql.connect("db_name") + >>> c = conn.cursor() + >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") + >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") + col1 col2 column 3 col4 + 0 1 2.0 A String of information True + + >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") + col1 col2 column 3 col4 + 0 1 2.0 A String of information True + 1 6 17.0 A String of different information False + More information and Getting Involved ------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 1eb5cb42a89..186b2b555e3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,6 +33,37 @@ Pandas on Ray **Pandas on Ray is currently for experimental use only. Requests and contributions are welcome!** +SQL on Ray +---------- + +*SQL on Ray is currently under development. Coming Soon!* + +**We have implemented a simple example that can be found below. Feedback welcome!** + +.. code-block:: python + + >>> import modin.sql as sql + Process STDOUT and STDERR is being redirected to /tmp/raylogs/. + Waiting for redis server at 127.0.0.1:46487 to respond... + Waiting for redis server at 127.0.0.1:23966 to respond... + Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. + + ====================================================================== + View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a + ====================================================================== + + >>> conn = sql.connect("db_name") + >>> c = conn.cursor() + >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") + >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") + col1 col2 column 3 col4 + 0 1 2.0 A String of information True + + >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") + col1 col2 column 3 col4 + 0 1 2.0 A String of information True + 1 6 17.0 A String of different information False + .. toctree:: :maxdepth: 1 :caption: Installation @@ -45,3 +76,9 @@ Pandas on Ray pandas_on_ray.rst pandas_supported.rst + +.. toctree:: + :maxdepth: 1 + :caption: SQL on Ray + + sql_on_ray.rst diff --git a/docs/sql_on_ray.rst b/docs/sql_on_ray.rst new file mode 100644 index 00000000000..6285ed5f0cd --- /dev/null +++ b/docs/sql_on_ray.rst @@ -0,0 +1,5 @@ +SQL on Ray +========== + +**SQL on Ray is currently under development. Coming Soon!** + diff --git a/modin/sql/__init__.py b/modin/sql/__init__.py new file mode 100644 index 00000000000..9b929c40f81 --- /dev/null +++ b/modin/sql/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from .connection import connect diff --git a/modin/sql/connection.py b/modin/sql/connection.py new file mode 100644 index 00000000000..3c00aeec82a --- /dev/null +++ b/modin/sql/connection.py @@ -0,0 +1,51 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ..pandas import Series, DataFrame + + +class Connection(object): + + def __init__(self, name): + self._name = name + self._cursor = None + + def cursor(self): + self._cursor = Cursor() + return self._cursor + + def commit(self): + pass + + def close(self): + self._cursor = None + + +class Cursor(object): + + def __init__(self): + self._tables = {} + + def execute(self, query): + split_query = query.split(" ") + if split_query[:2] == ["CREATE", "TABLE"]: + column_names = " ".join(split_query[3:])\ + .replace("(", "").replace(")", "").split(", ") + columns = Series(column_names) + self._tables[split_query[2]] = DataFrame(columns=columns) + + elif split_query[:2] == ["INSERT", "INTO"]: + table = self._tables[split_query[2]] + values = " ".join(split_query[4:])\ + .replace("(", "").replace(")", "").split(", ") + to_append = Series([eval(i) for i in values], index=table.columns) + self._tables[split_query[2]] =\ + table.append(to_append, ignore_index=True) + print(self._tables[split_query[2]]) + else: + print("ERROR") + + +def connect(name): + return Connection(name) From 168fbd616640cd2ae12402dd3dc9aef08a72045c Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 02:49:21 -0700 Subject: [PATCH 2/9] Fix format --- README.rst | 2 +- docs/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ede9a0283ee..2df34298d5a 100644 --- a/README.rst +++ b/README.rst @@ -42,7 +42,7 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** -.. code-block:: python +.. code-block:: >>> import modin.sql as sql Process STDOUT and STDERR is being redirected to /tmp/raylogs/. diff --git a/docs/index.rst b/docs/index.rst index 186b2b555e3..2a0585e6749 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -40,7 +40,7 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** -.. code-block:: python +.. code-block:: >>> import modin.sql as sql Process STDOUT and STDERR is being redirected to /tmp/raylogs/. From 06422a250d2a81c83254de0631625e96164c2188 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 02:52:25 -0700 Subject: [PATCH 3/9] Fix format --- README.rst | 3 ++- docs/index.rst | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2df34298d5a..b1820cfbc6e 100644 --- a/README.rst +++ b/README.rst @@ -42,7 +42,8 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** -.. code-block:: +.. code-block:: python + :emphasize-lines: 1, 11, 12, 13, 14, 18 >>> import modin.sql as sql Process STDOUT and STDERR is being redirected to /tmp/raylogs/. diff --git a/docs/index.rst b/docs/index.rst index 2a0585e6749..705dc37b5e9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -40,7 +40,8 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** -.. code-block:: +.. code-block:: python + :emphasize-lines: 1, 11, 12, 13, 14, 18 >>> import modin.sql as sql Process STDOUT and STDERR is being redirected to /tmp/raylogs/. From 2c24bbc68096f066cdb9532d2ffd60f87f987459 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 02:54:59 -0700 Subject: [PATCH 4/9] Fix format --- README.rst | 40 ++++++++++++++++++++-------------------- docs/index.rst | 40 ++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/README.rst b/README.rst index b1820cfbc6e..38a341b8440 100644 --- a/README.rst +++ b/README.rst @@ -43,29 +43,29 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** .. code-block:: python - :emphasize-lines: 1, 11, 12, 13, 14, 18 - - >>> import modin.sql as sql - Process STDOUT and STDERR is being redirected to /tmp/raylogs/. - Waiting for redis server at 127.0.0.1:46487 to respond... - Waiting for redis server at 127.0.0.1:23966 to respond... - Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. - - ====================================================================== - View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a - ====================================================================== - - >>> conn = sql.connect("db_name") - >>> c = conn.cursor() - >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") - >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") + :emphasize-lines: 1, 11, 12, 13, 14, 18 + + >>> import modin.sql as sql + Process STDOUT and STDERR is being redirected to /tmp/raylogs/. + Waiting for redis server at 127.0.0.1:46487 to respond... + Waiting for redis server at 127.0.0.1:23966 to respond... + Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. + + ====================================================================== + View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a + ====================================================================== + + >>> conn = sql.connect("db_name") + >>> c = conn.cursor() + >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") + >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") col1 col2 column 3 col4 - 0 1 2.0 A String of information True + 0 1 2.0 A String of information True - >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") + >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") col1 col2 column 3 col4 - 0 1 2.0 A String of information True - 1 6 17.0 A String of different information False + 0 1 2.0 A String of information True + 1 6 17.0 A String of different information False More information and Getting Involved ------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 705dc37b5e9..a8bef2e652f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -41,29 +41,29 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** .. code-block:: python - :emphasize-lines: 1, 11, 12, 13, 14, 18 - - >>> import modin.sql as sql - Process STDOUT and STDERR is being redirected to /tmp/raylogs/. - Waiting for redis server at 127.0.0.1:46487 to respond... - Waiting for redis server at 127.0.0.1:23966 to respond... - Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. - - ====================================================================== - View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a - ====================================================================== - - >>> conn = sql.connect("db_name") - >>> c = conn.cursor() - >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") - >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") + :emphasize-lines: 1, 11, 12, 13, 14, 18 + + >>> import modin.sql as sql + Process STDOUT and STDERR is being redirected to /tmp/raylogs/. + Waiting for redis server at 127.0.0.1:46487 to respond... + Waiting for redis server at 127.0.0.1:23966 to respond... + Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. + + ====================================================================== + View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a + ====================================================================== + + >>> conn = sql.connect("db_name") + >>> c = conn.cursor() + >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") + >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") col1 col2 column 3 col4 - 0 1 2.0 A String of information True + 0 1 2.0 A String of information True - >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") + >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") col1 col2 column 3 col4 - 0 1 2.0 A String of information True - 1 6 17.0 A String of different information False + 0 1 2.0 A String of information True + 1 6 17.0 A String of different information False .. toctree:: :maxdepth: 1 From 086a362b66b6472b79bddd77afd2aaa6aaee8825 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 02:56:49 -0700 Subject: [PATCH 5/9] Fix format --- README.rst | 11 +---------- docs/index.rst | 11 +---------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/README.rst b/README.rst index 38a341b8440..36e9d810e74 100644 --- a/README.rst +++ b/README.rst @@ -43,18 +43,9 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** .. code-block:: python - :emphasize-lines: 1, 11, 12, 13, 14, 18 >>> import modin.sql as sql - Process STDOUT and STDERR is being redirected to /tmp/raylogs/. - Waiting for redis server at 127.0.0.1:46487 to respond... - Waiting for redis server at 127.0.0.1:23966 to respond... - Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. - - ====================================================================== - View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a - ====================================================================== - + >>> >>> conn = sql.connect("db_name") >>> c = conn.cursor() >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") diff --git a/docs/index.rst b/docs/index.rst index a8bef2e652f..25e410ce301 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -41,18 +41,9 @@ SQL on Ray **We have implemented a simple example that can be found below. Feedback welcome!** .. code-block:: python - :emphasize-lines: 1, 11, 12, 13, 14, 18 >>> import modin.sql as sql - Process STDOUT and STDERR is being redirected to /tmp/raylogs/. - Waiting for redis server at 127.0.0.1:46487 to respond... - Waiting for redis server at 127.0.0.1:23966 to respond... - Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 8}. - - ====================================================================== - View the web UI at http://localhost:8892/notebooks/ray_ui78522.ipynb?token=02776ac38ddf5756b29da5b06ad06c491dc9ddca324b1f0a - ====================================================================== - + >>> >>> conn = sql.connect("db_name") >>> c = conn.cursor() >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") From bf77c3aceeab45aa9812d50bf7b2036e4e48f496 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 02:57:47 -0700 Subject: [PATCH 6/9] Fix format --- README.rst | 4 ++-- docs/index.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 36e9d810e74..857915439cf 100644 --- a/README.rst +++ b/README.rst @@ -50,11 +50,11 @@ SQL on Ray >>> c = conn.cursor() >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") - col1 col2 column 3 col4 + col1 col2 column 3 col4 0 1 2.0 A String of information True >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") - col1 col2 column 3 col4 + col1 col2 column 3 col4 0 1 2.0 A String of information True 1 6 17.0 A String of different information False diff --git a/docs/index.rst b/docs/index.rst index 25e410ce301..0b7ea0fccb4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -48,11 +48,11 @@ SQL on Ray >>> c = conn.cursor() >>> c.execute("CREATE TABLE example (col1, col2, column 3, col4)") >>> c.execute("INSERT INTO example VALUES ('1', 2.0, 'A String of information', True)") - col1 col2 column 3 col4 + col1 col2 column 3 col4 0 1 2.0 A String of information True >>> c.execute("INSERT INTO example VALUES ('6', 17.0, 'A String of different information', False)") - col1 col2 column 3 col4 + col1 col2 column 3 col4 0 1 2.0 A String of information True 1 6 17.0 A String of different information False From 384f40624587aa0f75ddf94a500d71ac1f9e04bb Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 16:01:33 -0700 Subject: [PATCH 7/9] Addressing comments and cleanup --- modin/sql/connection.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modin/sql/connection.py b/modin/sql/connection.py index 3c00aeec82a..54732ac720d 100644 --- a/modin/sql/connection.py +++ b/modin/sql/connection.py @@ -29,13 +29,13 @@ def __init__(self): def execute(self, query): split_query = query.split(" ") - if split_query[:2] == ["CREATE", "TABLE"]: + if " ".join(split_query[:2]) == "CREATE TABLE": column_names = " ".join(split_query[3:])\ .replace("(", "").replace(")", "").split(", ") columns = Series(column_names) self._tables[split_query[2]] = DataFrame(columns=columns) - elif split_query[:2] == ["INSERT", "INTO"]: + elif " ".join(split_query[:2]) == "INSERT INTO": table = self._tables[split_query[2]] values = " ".join(split_query[4:])\ .replace("(", "").replace(")", "").split(", ") @@ -44,7 +44,8 @@ def execute(self, query): table.append(to_append, ignore_index=True) print(self._tables[split_query[2]]) else: - print("ERROR") + raise NotImplementedError("This API is for demonstration purposes " + "only. Coming Soon!") def connect(name): From fe9d2edc2d1f2fee8743fd2ac748d4458061f2a2 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 16:17:58 -0700 Subject: [PATCH 8/9] Address comments --- modin/sql/connection.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/modin/sql/connection.py b/modin/sql/connection.py index 54732ac720d..bf16edb267f 100644 --- a/modin/sql/connection.py +++ b/modin/sql/connection.py @@ -30,23 +30,29 @@ def __init__(self): def execute(self, query): split_query = query.split(" ") if " ".join(split_query[:2]) == "CREATE TABLE": - column_names = " ".join(split_query[3:])\ - .replace("(", "").replace(")", "").split(", ") - columns = Series(column_names) - self._tables[split_query[2]] = DataFrame(columns=columns) + self._create_table(split_query) elif " ".join(split_query[:2]) == "INSERT INTO": - table = self._tables[split_query[2]] - values = " ".join(split_query[4:])\ - .replace("(", "").replace(")", "").split(", ") - to_append = Series([eval(i) for i in values], index=table.columns) - self._tables[split_query[2]] =\ - table.append(to_append, ignore_index=True) - print(self._tables[split_query[2]]) + self._insert_into(split_query) else: raise NotImplementedError("This API is for demonstration purposes " "only. Coming Soon!") + def _create_table(self, split_query): + column_names = " ".join(split_query[3:]) \ + .replace("(", "").replace(")", "").split(", ") + columns = Series(column_names) + self._tables[split_query[2]] = DataFrame(columns=columns) + + def _insert_into(self, split_query): + table = self._tables[split_query[2]] + values = " ".join(split_query[4:]) \ + .replace("(", "").replace(")", "").split(", ") + to_append = Series([eval(i) for i in values], index=table.columns) + self._tables[split_query[2]] = \ + table.append(to_append, ignore_index=True) + print(self._tables[split_query[2]]) + def connect(name): return Connection(name) From 774572e9e7804f94c24b6f9627da05f88a35219d Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Thu, 5 Jul 2018 19:49:02 -0700 Subject: [PATCH 9/9] Fix lint --- modin/sql/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modin/sql/__init__.py b/modin/sql/__init__.py index 9b929c40f81..1de78687bd1 100644 --- a/modin/sql/__init__.py +++ b/modin/sql/__init__.py @@ -3,3 +3,5 @@ from __future__ import print_function from .connection import connect + +__all__ = ["connect"]