Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PANDAS plugin tested #204

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions rows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,8 @@
if plugins.parquet:
import_from_parquet = plugins.parquet.import_from_parquet

if plugins.pandas:
import_from_pandas = plugins._pandas.import_from_pandas
export_to_pandas = plugins._pandas.export_to_pandas

__version__ = '0.3.1dev0'
5 changes: 5 additions & 0 deletions rows/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,8 @@
from . import plugin_parquet as parquet
except ImportError:
parquet = None

try:
from . import _pandas as pandas
except ImportError:
pandas = None
61 changes: 61 additions & 0 deletions rows/plugins/_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# coding: utf-8

# Copyright 2014-2015 Álvaro Justen <https://github.com/turicas/rows/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import datetime

import pandas

from rows.plugins.utils import create_table

DATE_FORMAT = "%Y-%m-%d %H:%M:%S"

def import_from_pandas(data_frame, *args, **kwargs):

meta = {'imported_from': 'pandas', 'filename': 'DataFrame', }
return create_table(_dataframe_generator(data_frame), meta=meta, *args,
**kwargs)

def _dataframe_generator(data_frame):
yield list(data_frame)

for _, row in data_frame.iterrows():
yield dataframe_row_to_list(row)

def dataframe_row_to_list(row):
result = []
for element_index, element in enumerate(row):
#Problem importing pandas.tslib.Timestamp or pandas.Timestamp
if isinstance(element, pandas.tslib.Timestamp):
date_string = element.strftime(DATE_FORMAT)
if date_string.endswith("00:00:00"):
value = datetime.datetime.strptime(date_string,
DATE_FORMAT).date()
else:
value = element.to_datetime()
result.append(value)
return result


def export_to_pandas(table_obj):
data_frame = pandas.DataFrame(_generator_table(table_obj),
columns=table_obj.field_names)

return data_frame

def _generator_table(table_obj):
for row in table_obj:
yield list(row)
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
'xls': ['xlrd', 'xlwt'],
'xlsx': ['openpyxl'],
'xpath': ['lxml'],
'detect': ['file-magic'], }
'detect': ['file-magic'],
'pandas': ['pandas'],
}
EXTRA_REQUIREMENTS['all'] = sum(EXTRA_REQUIREMENTS.values(), [])
INSTALL_REQUIREMENTS = ['six'] + EXTRA_REQUIREMENTS['csv']
LONG_DESCRIPTION = '''
Expand Down
96 changes: 96 additions & 0 deletions tests/tests_plugin_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# coding: utf-8

# Copyright 2014-2015 Álvaro Justen <https://github.com/turicas/rows/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
import mock
import pandas
import rows.plugins._pandas
import utils
from datetime import datetime as dt
from collections import OrderedDict

class PluginPandasTestCase(utils.RowsTestMixIn, unittest.TestCase):

plugin_name = 'pandas'
data_frame = pandas.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
columns=['A', 'B', 'C', 'D'])

def test_imports(self):
self.assertIs(rows.import_from_pandas,
rows.plugins._pandas.import_from_pandas)

@mock.patch('rows.plugins._pandas.create_table')
def test_import_from_pandas_uses_create_table(self, mocked_create_table):
mocked_create_table.return_value = 101
kwargs = {'encoding': 'test', 'some_key': 123, 'other': 456, }
result = rows.import_from_pandas(self.data_frame, **kwargs)
self.assertTrue(mocked_create_table.called)
self.assertEqual(mocked_create_table.call_count, 1)
self.assertEqual(result, 101)

call = mocked_create_table.call_args
# turicas on Nov 4, 2015 Owner
#
# Since it was not imported from a file, filename key should be None.
# kwargs['meta'] = {'imported_from': 'pandas', 'filename': 'DataFrame'}
kwargs['meta'] = {'imported_from': 'PANDAS', 'filename': None}
self.assertEqual(call[1], kwargs)

@mock.patch('rows.plugins._pandas.pandas.DataFrame')
def test_export_to_pandas_uses_data_frame(self, mocked_data_frame):
mocked_data_frame.return_value = 101
result = rows.plugins._pandas.export_to_pandas(utils.table)
self.assertTrue(mocked_data_frame.called)
self.assertTrue(mocked_data_frame.call_count, 1)
self.assertEqual(result, 101)

# Data table with several different types
data_dict = OrderedDict((
(u"id", [1, 2, 3, 4, 5, 6]),
(u"name", [u"John", u"Terry", u"Eric", u"Graham", u"Terry", u"Michael"]),
(u"birth", [dt(1977, 1, 1, 15, 15), dt(1944, 9, 1, 15, 30),
dt(1969, 1, 5, 15, 44), dt(1937, 1, 13, 15, 13),
dt(1953, 10, 1, 5, 3), dt(1981, 5, 1, 15, 3)]),
(u"height", [3.3, 1.67, 1.24, 5.12, 1.88, 1.89]),
(u"is_vegan", [True, False, True, False, True, False])))


def _compare_tables(self, df, rows_table):
"""Compares a rows.Table against a pandas.DataFrame for the sake of this test"""
for h0, h1 in zip(rows_table.field_names, list(df)):
# Header names match?
self.assertEqual(h0, h1, '%s != %s' % (h0, h1))
for row0, row1 in zip(rows_table, df.values):
for cell0, cell1 in zip(row0, row1):
# Cell values match?

# print cell0, cell1
self.assertEqual(cell0, cell1, '%s != %s' % (cell0, cell1))


def test_import_from_pandas(self):
df = pandas.DataFrame(self.data_dict)
rows_table = rows.import_from_pandas(df)
self._compare_tables(df, rows_table)


def test_export_to_pandas(self):
# This test creates a rows.Table from a PANDAS data frame,
# then converts it back to a PANDAS data frame.
df0 = pandas.DataFrame(self.data_dict)
rows_table = rows.import_from_pandas(df0)
df = rows.export_to_pandas(rows_table)
self._compare_tables(df, rows_table)