From d5e5e5dbeb9fb23e895b9ac654fafb764a164c0b Mon Sep 17 00:00:00 2001 From: Peter Schafhalter Date: Wed, 16 May 2018 17:00:11 -0700 Subject: [PATCH] Fix python2 integer serialization bug --- cpp/src/arrow/python/arrow_to_python.cc | 10 ++++++++-- cpp/src/arrow/python/python_to_arrow.cc | 11 ++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/python/arrow_to_python.cc b/cpp/src/arrow/python/arrow_to_python.cc index 57884e58c374c..518a560b89483 100644 --- a/cpp/src/arrow/python/arrow_to_python.cc +++ b/cpp/src/arrow/python/arrow_to_python.cc @@ -112,9 +112,15 @@ Status GetValue(PyObject* context, const UnionArray& parent, const Array& arr, case Type::BOOL: *result = PyBool_FromLong(checked_cast(arr).Value(index)); return Status::OK(); - case Type::INT64: - *result = PyLong_FromSsize_t(checked_cast(arr).Value(index)); + case Type::INT64: { + const std::string& child_name = parent.type()->child(type)->name(); + if (child_name == "py2_int") { + *result = PyInt_FromSsize_t(checked_cast(arr).Value(index)); + } else { + *result = PyLong_FromSsize_t(checked_cast(arr).Value(index)); + } return Status::OK(); + } case Type::BINARY: { int32_t nchars; const uint8_t* str = checked_cast(arr).GetValue(index, &nchars); diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index 64cf2b4c168c0..23445ee26d0e7 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -61,6 +61,7 @@ class SequenceBuilder { nones_(pool), bools_(::arrow::boolean(), pool), ints_(::arrow::int64(), pool), + py2_ints_(::arrow::int64(), pool), bytes_(::arrow::binary(), pool), strings_(pool), half_floats_(::arrow::float16(), pool), @@ -103,6 +104,11 @@ class SequenceBuilder { return AppendPrimitive(data, &bool_tag_, &bools_); } + /// Appending a python 2 int64_t to the sequence + Status AppendPy2Int64(const int64_t data) { + return AppendPrimitive(data, &py2_int_tag_, &py2_ints_); + } + /// Appending an int64_t to the sequence Status AppendInt64(const int64_t data) { return AppendPrimitive(data, &int_tag_, &ints_); @@ -250,6 +256,7 @@ class SequenceBuilder { RETURN_NOT_OK(AddElement(bool_tag_, &bools_)); RETURN_NOT_OK(AddElement(int_tag_, &ints_)); + RETURN_NOT_OK(AddElement(py2_int_tag_, &py2_ints_, "py2_int")); RETURN_NOT_OK(AddElement(string_tag_, &strings_)); RETURN_NOT_OK(AddElement(bytes_tag_, &bytes_)); RETURN_NOT_OK(AddElement(half_float_tag_, &half_floats_)); @@ -280,6 +287,7 @@ class SequenceBuilder { NullBuilder nones_; BooleanBuilder bools_; Int64Builder ints_; + Int64Builder py2_ints_; BinaryBuilder bytes_; StringBuilder strings_; HalfFloatBuilder half_floats_; @@ -302,6 +310,7 @@ class SequenceBuilder { // happens in the UPDATE macro in sequence.cc. int8_t bool_tag_ = -1; int8_t int_tag_ = -1; + int8_t py2_int_tag_ = -1; int8_t string_tag_ = -1; int8_t bytes_tag_ = -1; int8_t half_float_tag_ = -1; @@ -481,7 +490,7 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder, } #if PY_MAJOR_VERSION < 3 } else if (PyInt_Check(elem)) { - RETURN_NOT_OK(builder->AppendInt64(static_cast(PyInt_AS_LONG(elem)))); + RETURN_NOT_OK(builder->AppendPy2Int64(static_cast(PyInt_AS_LONG(elem)))); #endif } else if (PyBytes_Check(elem)) { auto data = reinterpret_cast(PyBytes_AS_STRING(elem));