diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 33db9c28c78a9..d694ad3679ac1 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -8,4 +8,4 @@ echo "install 35" conda remove -n pandas python-dateutil --force pip install python-dateutil -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1f3bf00c87767..b24a6f067cee4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -125,7 +125,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) -- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. (:issue:`15838`, :issue:`17438`) - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 09603fd6fdcce..4b507b7f5df6f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -58,13 +58,23 @@ def __init__(self): "\nor via pip\n" "pip install -U pyarrow\n") + self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0' + self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0' self.api = pyarrow - def write(self, df, path, compression='snappy', **kwargs): + def write(self, df, path, compression='snappy', + coerce_timestamps='ms', **kwargs): path, _, _ = get_filepath_or_buffer(path) - table = self.api.Table.from_pandas(df, timestamps_to_ms=True) - self.api.parquet.write_table( - table, path, compression=compression, **kwargs) + if self._pyarrow_lt_060: + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + else: + table = self.api.Table.from_pandas(df) + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path): path, _, _ = get_filepath_or_buffer(path)