From 382592fbf373e1ccce055d1a47adb060bd36e502 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 5 Sep 2017 08:09:48 -0400 Subject: [PATCH] deprecate timestamps_to_ms in .from_pandas() --- python/pyarrow/array.pxi | 2 + python/pyarrow/table.pxi | 3 + python/pyarrow/tests/test_convert_pandas.py | 62 +++++---------------- 3 files changed, 19 insertions(+), 48 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 20e778d068ff8..a693f45e967a5 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -121,6 +121,8 @@ cdef class Array: compatibility with other functionality like Parquet I/O which only supports milliseconds. + .. deprecated:: 0.7.0 + memory_pool: MemoryPool, optional Specific memory pool to use to allocate the resulting Arrow array. diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 245371ffaa124..fc6099fe8430d 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -723,6 +723,9 @@ cdef class Table: Convert datetime columns to ms resolution. This is needed for compability with other functionality like Parquet I/O which only supports milliseconds. + + .. deprecated:: 0.7.0 + schema : pyarrow.Schema, optional The expected schema of the Arrow Table. This can be used to indicate the type of columns if we cannot infer it automatically. diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 52290d6d85533..64424341067bb 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -69,10 +69,10 @@ def tearDown(self): pass def _check_pandas_roundtrip(self, df, expected=None, nthreads=1, - timestamps_to_ms=False, expected_schema=None, + expected_schema=None, check_dtype=True, schema=None, check_index=False): - table = pa.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms, + table = pa.Table.from_pandas(df, schema=schema, preserve_index=check_index) result = table.to_pandas(nthreads=nthreads) if expected_schema: @@ -92,9 +92,8 @@ def _check_series_roundtrip(self, s, type_=None): tm.assert_series_equal(s, result) def _check_array_roundtrip(self, values, expected=None, mask=None, - timestamps_to_ms=False, type=None): - arr = pa.Array.from_pandas(values, timestamps_to_ms=timestamps_to_ms, - mask=mask, type=type) + type=None): + arr = pa.Array.from_pandas(values, mask=mask, type=type) result = arr.to_pandas() values_nulls = pd.isnull(values) @@ -332,21 +331,6 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self): pa.Table.from_pandas(df, schema=schema) def test_timestamps_notimezone_no_nulls(self): - df = pd.DataFrame({ - 'datetime64': np.array([ - '2007-07-13T01:23:34.123', - '2006-01-13T12:34:56.432', - '2010-08-13T05:46:57.437'], - dtype='datetime64[ms]') - }) - field = pa.field('datetime64', pa.timestamp('ms')) - schema = pa.schema([field]) - self._check_pandas_roundtrip( - df, - timestamps_to_ms=True, - expected_schema=schema, - ) - df = pd.DataFrame({ 'datetime64': np.array([ '2007-07-13T01:23:34.123456789', @@ -357,7 +341,8 @@ def test_timestamps_notimezone_no_nulls(self): field = pa.field('datetime64', pa.timestamp('ns')) schema = pa.schema([field]) self._check_pandas_roundtrip( - df, expected_schema=schema, + df, + expected_schema=schema, ) def test_timestamps_to_ms_explicit_schema(self): @@ -365,28 +350,15 @@ def test_timestamps_to_ms_explicit_schema(self): df = pd.DataFrame({'datetime': [datetime(2017, 1, 1)]}) pa_type = pa.from_numpy_dtype(df['datetime'].dtype) - arr = pa.Array.from_pandas(df['datetime'], type=pa_type, - timestamps_to_ms=True) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + arr = pa.Array.from_pandas(df['datetime'], type=pa_type, + timestamps_to_ms=True) tm.assert_almost_equal(df['datetime'].values.astype('M8[ms]'), arr.to_pandas()) def test_timestamps_notimezone_nulls(self): - df = pd.DataFrame({ - 'datetime64': np.array([ - '2007-07-13T01:23:34.123', - None, - '2010-08-13T05:46:57.437'], - dtype='datetime64[ms]') - }) - field = pa.field('datetime64', pa.timestamp('ms')) - schema = pa.schema([field]) - self._check_pandas_roundtrip( - df, - timestamps_to_ms=True, - expected_schema=schema, - ) - df = pd.DataFrame({ 'datetime64': np.array([ '2007-07-13T01:23:34.123456789', @@ -397,7 +369,8 @@ def test_timestamps_notimezone_nulls(self): field = pa.field('datetime64', pa.timestamp('ns')) schema = pa.schema([field]) self._check_pandas_roundtrip( - df, expected_schema=schema, + df, + expected_schema=schema, ) def test_timestamps_with_timezone(self): @@ -410,7 +383,7 @@ def test_timestamps_with_timezone(self): }) df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern') .to_frame()) - self._check_pandas_roundtrip(df, timestamps_to_ms=True) + self._check_pandas_roundtrip(df) self._check_series_roundtrip(df['datetime64']) @@ -425,15 +398,8 @@ def test_timestamps_with_timezone(self): }) df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern') .to_frame()) - self._check_pandas_roundtrip(df) - - def test_timestamp_with_tz_to_pandas_type(self): - from pyarrow.compat import DatetimeTZDtype - tz = 'America/Los_Angeles' - t = pa.timestamp('ns', tz=tz) - - assert t.to_pandas_dtype() == DatetimeTZDtype('ns', tz=tz) + self._check_pandas_roundtrip(df) def test_date_infer(self): df = pd.DataFrame({