From e55fd880b5ec2d8f66b1b2949193652e9994277b Mon Sep 17 00:00:00 2001 From: Rhenan Bartels Date: Tue, 27 Oct 2015 13:17:04 -0200 Subject: [PATCH 1/5] First commit: import_from_pandas initialized --- rows/__init__.py | 1 + rows/plugins/pandas.py | 47 ++++++++++++++++++++++++++++++++++++ setup.py | 3 ++- tests/tests_plugin_pandas.py | 33 +++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 rows/plugins/pandas.py create mode 100644 tests/tests_plugin_pandas.py diff --git a/rows/__init__.py b/rows/__init__.py index 897df4e8..08fdb3f2 100644 --- a/rows/__init__.py +++ b/rows/__init__.py @@ -29,6 +29,7 @@ from rows.plugins._json import import_from_json, export_to_json from rows.plugins.csv import import_from_csv, export_to_csv from rows.plugins.txt import import_from_txt, export_to_txt +from rows.plugins.pandas import import_from_pandas # Have dependencies diff --git a/rows/plugins/pandas.py b/rows/plugins/pandas.py new file mode 100644 index 00000000..55970925 --- /dev/null +++ b/rows/plugins/pandas.py @@ -0,0 +1,47 @@ +# coding: utf-8 + +# Copyright 2014-2015 Álvaro Justen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import datetime + +from rows.plugins.utils import create_table + + +def import_from_pandas(data_frame, *args, **kwargs): + header = list(data_frame) + + table_rows = [] + for _, row in data_frame.iterrows(): + row = correct_row_values(row) + table_rows.append(list(row)) + + meta = {'imported_from': 'pandas', 'filename': 'DataFrame', } + return create_table([header] + table_rows, meta=meta, *args, **kwargs) + + +def correct_row_values(row): + date_format = "%Y-%m-%d %H:%M:%S" + for element_index, element in enumerate(row): + #Problem importing pandas.tslib.Timestamp or pandas.Timestamp + if hasattr(element, 'is_month_end'): + date_string = element.strftime(date_format) + if date_string.endswith("00:00:00"): + row.values[element_index] = \ + datetime.datetime.strptime(date_string, + date_format).date() + else: + row.values[element_index] = element.to_datetime() + return row diff --git a/setup.py b/setup.py index b5a57fa8..f8d304f6 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,8 @@ 'html': ['lxml'], # apt: libxslt-dev libxml2-dev 'ods': ['lxml'], 'xls': ['xlrd', 'xlwt'], - 'xlsx': ['openpyxl'], } + 'xlsx': ['openpyxl'], + 'pandas': ['pandas'], } EXTRA_REQUIREMENTS['all'] = sum(EXTRA_REQUIREMENTS.values(), []) INSTALL_REQUIREMENTS = EXTRA_REQUIREMENTS['csv'] LONG_DESCRIPTION = ''' diff --git a/tests/tests_plugin_pandas.py b/tests/tests_plugin_pandas.py new file mode 100644 index 00000000..08b32302 --- /dev/null +++ b/tests/tests_plugin_pandas.py @@ -0,0 +1,33 @@ +# coding: utf-8 + +# Copyright 2014-2015 Álvaro Justen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import unittest + +import rows.plugins.pandas +import utils + +class PluginPandasTestCase(utils.RowsTestMixIn, unittest.TestCase): + + plugin_name = 'xls' + filename = 'tests/data/all-field-types.csv' + + def test_imports(self): + self.assertIs(rows.import_from_pandas, + rows.plugins.pandas.import_from_pandas) + + def test_import_from_pandas(self): + pass + From 0a4a18b930abae5778ea512c279a95ff9ece09e6 Mon Sep 17 00:00:00 2001 From: Rhenan Bartels Date: Tue, 27 Oct 2015 16:53:19 -0200 Subject: [PATCH 2/5] Working on export_to_pandas --- rows/__init__.py | 2 +- rows/plugins/{pandas.py => _pandas.py} | 35 ++++++++++++++++++-------- setup.py | 2 +- tests/tests_plugin_pandas.py | 21 +++++++++++++--- 4 files changed, 43 insertions(+), 17 deletions(-) rename rows/plugins/{pandas.py => _pandas.py} (67%) diff --git a/rows/__init__.py b/rows/__init__.py index 08fdb3f2..788fe707 100644 --- a/rows/__init__.py +++ b/rows/__init__.py @@ -29,7 +29,7 @@ from rows.plugins._json import import_from_json, export_to_json from rows.plugins.csv import import_from_csv, export_to_csv from rows.plugins.txt import import_from_txt, export_to_txt -from rows.plugins.pandas import import_from_pandas +from rows.plugins._pandas import import_from_pandas # Have dependencies diff --git a/rows/plugins/pandas.py b/rows/plugins/_pandas.py similarity index 67% rename from rows/plugins/pandas.py rename to rows/plugins/_pandas.py index 55970925..e3f9b9c5 100644 --- a/rows/plugins/pandas.py +++ b/rows/plugins/_pandas.py @@ -17,31 +17,44 @@ import datetime +import pandas + from rows.plugins.utils import create_table def import_from_pandas(data_frame, *args, **kwargs): - header = list(data_frame) + meta = {'imported_from': 'pandas', 'filename': 'DataFrame', } + return create_table(_dataframe_generator(data_frame), meta=meta, *args, + **kwargs) + +def _dataframe_generator(data_frame): + yield list(data_frame) - table_rows = [] for _, row in data_frame.iterrows(): row = correct_row_values(row) - table_rows.append(list(row)) - - meta = {'imported_from': 'pandas', 'filename': 'DataFrame', } - return create_table([header] + table_rows, meta=meta, *args, **kwargs) - + yield list(row) def correct_row_values(row): date_format = "%Y-%m-%d %H:%M:%S" for element_index, element in enumerate(row): #Problem importing pandas.tslib.Timestamp or pandas.Timestamp - if hasattr(element, 'is_month_end'): + if isinstance(element, pandas.tslib.Timestamp): date_string = element.strftime(date_format) if date_string.endswith("00:00:00"): - row.values[element_index] = \ - datetime.datetime.strptime(date_string, - date_format).date() + row.values[element_index] = datetime.datetime.\ + strptime(date_string, + date_format).date() else: row.values[element_index] = element.to_datetime() return row + + +def export_to_pandas(table_obj): + data_frame = pandas.DataFrame(_generator_table(table_obj), + columns=table_obj.field_names) + + return data_frame + +def _generator_table(table_obj): + for row in table_obj: + yield list(row) diff --git a/setup.py b/setup.py index f8d304f6..055588db 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ 'ods': ['lxml'], 'xls': ['xlrd', 'xlwt'], 'xlsx': ['openpyxl'], - 'pandas': ['pandas'], } + '_pandas': ['pandas'], } EXTRA_REQUIREMENTS['all'] = sum(EXTRA_REQUIREMENTS.values(), []) INSTALL_REQUIREMENTS = EXTRA_REQUIREMENTS['csv'] LONG_DESCRIPTION = ''' diff --git a/tests/tests_plugin_pandas.py b/tests/tests_plugin_pandas.py index 08b32302..4deef8a9 100644 --- a/tests/tests_plugin_pandas.py +++ b/tests/tests_plugin_pandas.py @@ -15,19 +15,32 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . import unittest +import mock -import rows.plugins.pandas +import rows.plugins._pandas +import rows.plugins.csv import utils class PluginPandasTestCase(utils.RowsTestMixIn, unittest.TestCase): plugin_name = 'xls' filename = 'tests/data/all-field-types.csv' + data_frame = rows.plugins.csv.import_from_csv(filename) def test_imports(self): self.assertIs(rows.import_from_pandas, - rows.plugins.pandas.import_from_pandas) + rows.plugins._pandas.import_from_pandas) - def test_import_from_pandas(self): - pass + @mock.patch('rows.plugins._pandas.create_table') + def test_import_from_pandas_uses_create_table(self, mocked_create_table): + mocked_create_table.return_value = 101 + kwargs = {'encoding': 'test', 'some_key': 123, 'other': 456, } + result = rows.import_from_pandas(self.data_frame, **kwargs) + self.assertTrue(mocked_create_table.called) + self.assertEqual(mocked_create_table.call_count, 1) + self.assertEqual(result, 101) + + call = mocked_create_table.call_args + kwargs['meta'] = {'imported_from': 'pandas', 'filename': 'DataFrame'} + self.assertEqual(call[1], kwargs) From 4f978d44cfbd912b3785698a0b734c29f4c712d4 Mon Sep 17 00:00:00 2001 From: Rhenan Bartels Date: Wed, 28 Oct 2015 14:39:22 -0200 Subject: [PATCH 3/5] Little changes in testing --- tests/tests_plugin_pandas.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/tests_plugin_pandas.py b/tests/tests_plugin_pandas.py index 4deef8a9..5e82d4bd 100644 --- a/tests/tests_plugin_pandas.py +++ b/tests/tests_plugin_pandas.py @@ -17,6 +17,8 @@ import unittest import mock +import pandas + import rows.plugins._pandas import rows.plugins.csv import utils @@ -25,7 +27,8 @@ class PluginPandasTestCase(utils.RowsTestMixIn, unittest.TestCase): plugin_name = 'xls' filename = 'tests/data/all-field-types.csv' - data_frame = rows.plugins.csv.import_from_csv(filename) + data_frame = pandas.read_csv(filename) + table = rows.plugins.csv.import_from_csv(filename) def test_imports(self): self.assertIs(rows.import_from_pandas, @@ -44,3 +47,11 @@ def test_import_from_pandas_uses_create_table(self, mocked_create_table): kwargs['meta'] = {'imported_from': 'pandas', 'filename': 'DataFrame'} self.assertEqual(call[1], kwargs) + @mock.patch('rows.plugins._pandas.pandas.DataFrame') + def test_export_to_pandas_uses_data_frame(self, mocked_data_frame): + mocked_data_frame.return_value = 101 + result = rows.plugins._pandas.export_to_pandas(self.table) + self.assertTrue(mocked_data_frame.called) + self.assertTrue(mocked_data_frame.call_count, 1) + self.assertEqual(result, 101) + From 8a2365fe6716892fbe625415fdbe9310c0c54ef9 Mon Sep 17 00:00:00 2001 From: Rhenan Bartels Date: Thu, 19 Nov 2015 20:14:33 -0200 Subject: [PATCH 4/5] PR modifications --- rows/__init__.py | 5 ++++- rows/plugins/_pandas.py | 20 ++++++++++---------- setup.py | 2 +- tests/tests_plugin_pandas.py | 9 ++++----- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/rows/__init__.py b/rows/__init__.py index 788fe707..a5752f43 100644 --- a/rows/__init__.py +++ b/rows/__init__.py @@ -29,7 +29,6 @@ from rows.plugins._json import import_from_json, export_to_json from rows.plugins.csv import import_from_csv, export_to_csv from rows.plugins.txt import import_from_txt, export_to_txt -from rows.plugins._pandas import import_from_pandas # Have dependencies @@ -59,5 +58,9 @@ except ImportError: pass +try: + from rows.plugins._pandas import import_from_pandas, export_to_pandas +except ImportError: + pass __version__ = '0.2.0-dev' diff --git a/rows/plugins/_pandas.py b/rows/plugins/_pandas.py index e3f9b9c5..fe3aff05 100644 --- a/rows/plugins/_pandas.py +++ b/rows/plugins/_pandas.py @@ -21,6 +21,7 @@ from rows.plugins.utils import create_table +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" def import_from_pandas(data_frame, *args, **kwargs): meta = {'imported_from': 'pandas', 'filename': 'DataFrame', } @@ -31,22 +32,21 @@ def _dataframe_generator(data_frame): yield list(data_frame) for _, row in data_frame.iterrows(): - row = correct_row_values(row) - yield list(row) + yield dataframe_row_to_list(row) -def correct_row_values(row): - date_format = "%Y-%m-%d %H:%M:%S" +def dataframe_row_to_list(row): + result = [] for element_index, element in enumerate(row): #Problem importing pandas.tslib.Timestamp or pandas.Timestamp if isinstance(element, pandas.tslib.Timestamp): - date_string = element.strftime(date_format) + date_string = element.strftime(DATE_FORMAT) if date_string.endswith("00:00:00"): - row.values[element_index] = datetime.datetime.\ - strptime(date_string, - date_format).date() + value = datetime.datetime.strptime(date_string, + DATE_FORMAT).date() else: - row.values[element_index] = element.to_datetime() - return row + value = element.to_datetime() + result.append(value) + return result def export_to_pandas(table_obj): diff --git a/setup.py b/setup.py index 055588db..f8d304f6 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ 'ods': ['lxml'], 'xls': ['xlrd', 'xlwt'], 'xlsx': ['openpyxl'], - '_pandas': ['pandas'], } + 'pandas': ['pandas'], } EXTRA_REQUIREMENTS['all'] = sum(EXTRA_REQUIREMENTS.values(), []) INSTALL_REQUIREMENTS = EXTRA_REQUIREMENTS['csv'] LONG_DESCRIPTION = ''' diff --git a/tests/tests_plugin_pandas.py b/tests/tests_plugin_pandas.py index 5e82d4bd..9d140b73 100644 --- a/tests/tests_plugin_pandas.py +++ b/tests/tests_plugin_pandas.py @@ -25,10 +25,9 @@ class PluginPandasTestCase(utils.RowsTestMixIn, unittest.TestCase): - plugin_name = 'xls' - filename = 'tests/data/all-field-types.csv' - data_frame = pandas.read_csv(filename) - table = rows.plugins.csv.import_from_csv(filename) + plugin_name = 'pandas' + data_frame = pandas.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'C', 'D']) def test_imports(self): self.assertIs(rows.import_from_pandas, @@ -50,7 +49,7 @@ def test_import_from_pandas_uses_create_table(self, mocked_create_table): @mock.patch('rows.plugins._pandas.pandas.DataFrame') def test_export_to_pandas_uses_data_frame(self, mocked_data_frame): mocked_data_frame.return_value = 101 - result = rows.plugins._pandas.export_to_pandas(self.table) + result = rows.plugins._pandas.export_to_pandas(utils.table) self.assertTrue(mocked_data_frame.called) self.assertTrue(mocked_data_frame.call_count, 1) self.assertEqual(result, 101) From d1be1d704f31f7d53f7c283c9f667067339560cc Mon Sep 17 00:00:00 2001 From: Julio Trevisan Date: Tue, 18 Oct 2016 14:59:31 -0200 Subject: [PATCH 5/5] PANDAS plugin (to-from rows) tested --- tests/tests_plugin_pandas.py | 48 +++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/tests/tests_plugin_pandas.py b/tests/tests_plugin_pandas.py index 9d140b73..723fd992 100644 --- a/tests/tests_plugin_pandas.py +++ b/tests/tests_plugin_pandas.py @@ -16,12 +16,11 @@ # along with this program. If not, see . import unittest import mock - import pandas - import rows.plugins._pandas -import rows.plugins.csv import utils +from datetime import datetime as dt +from collections import OrderedDict class PluginPandasTestCase(utils.RowsTestMixIn, unittest.TestCase): @@ -43,7 +42,11 @@ def test_import_from_pandas_uses_create_table(self, mocked_create_table): self.assertEqual(result, 101) call = mocked_create_table.call_args - kwargs['meta'] = {'imported_from': 'pandas', 'filename': 'DataFrame'} + # turicas on Nov 4, 2015 Owner + # + # Since it was not imported from a file, filename key should be None. + # kwargs['meta'] = {'imported_from': 'pandas', 'filename': 'DataFrame'} + kwargs['meta'] = {'imported_from': 'PANDAS', 'filename': None} self.assertEqual(call[1], kwargs) @mock.patch('rows.plugins._pandas.pandas.DataFrame') @@ -54,3 +57,40 @@ def test_export_to_pandas_uses_data_frame(self, mocked_data_frame): self.assertTrue(mocked_data_frame.call_count, 1) self.assertEqual(result, 101) + # Data table with several different types + data_dict = OrderedDict(( + (u"id", [1, 2, 3, 4, 5, 6]), + (u"name", [u"John", u"Terry", u"Eric", u"Graham", u"Terry", u"Michael"]), + (u"birth", [dt(1977, 1, 1, 15, 15), dt(1944, 9, 1, 15, 30), + dt(1969, 1, 5, 15, 44), dt(1937, 1, 13, 15, 13), + dt(1953, 10, 1, 5, 3), dt(1981, 5, 1, 15, 3)]), + (u"height", [3.3, 1.67, 1.24, 5.12, 1.88, 1.89]), + (u"is_vegan", [True, False, True, False, True, False]))) + + + def _compare_tables(self, df, rows_table): + """Compares a rows.Table against a pandas.DataFrame for the sake of this test""" + for h0, h1 in zip(rows_table.field_names, list(df)): + # Header names match? + self.assertEqual(h0, h1, '%s != %s' % (h0, h1)) + for row0, row1 in zip(rows_table, df.values): + for cell0, cell1 in zip(row0, row1): + # Cell values match? + + # print cell0, cell1 + self.assertEqual(cell0, cell1, '%s != %s' % (cell0, cell1)) + + + def test_import_from_pandas(self): + df = pandas.DataFrame(self.data_dict) + rows_table = rows.import_from_pandas(df) + self._compare_tables(df, rows_table) + + + def test_export_to_pandas(self): + # This test creates a rows.Table from a PANDAS data frame, + # then converts it back to a PANDAS data frame. + df0 = pandas.DataFrame(self.data_dict) + rows_table = rows.import_from_pandas(df0) + df = rows.export_to_pandas(rows_table) + self._compare_tables(df, rows_table)