Skip to content

Commit

Permalink
Fix python2 integer serialization bug
Browse files Browse the repository at this point in the history
  • Loading branch information
pschafhalter committed May 17, 2018
1 parent e237918 commit d5e5e5d
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
10 changes: 8 additions & 2 deletions cpp/src/arrow/python/arrow_to_python.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,15 @@ Status GetValue(PyObject* context, const UnionArray& parent, const Array& arr,
case Type::BOOL:
*result = PyBool_FromLong(checked_cast<const BooleanArray&>(arr).Value(index));
return Status::OK();
case Type::INT64:
*result = PyLong_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
case Type::INT64: {
const std::string& child_name = parent.type()->child(type)->name();
if (child_name == "py2_int") {
*result = PyInt_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
} else {
*result = PyLong_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
}
return Status::OK();
}
case Type::BINARY: {
int32_t nchars;
const uint8_t* str = checked_cast<const BinaryArray&>(arr).GetValue(index, &nchars);
Expand Down
11 changes: 10 additions & 1 deletion cpp/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class SequenceBuilder {
nones_(pool),
bools_(::arrow::boolean(), pool),
ints_(::arrow::int64(), pool),
py2_ints_(::arrow::int64(), pool),
bytes_(::arrow::binary(), pool),
strings_(pool),
half_floats_(::arrow::float16(), pool),
Expand Down Expand Up @@ -103,6 +104,11 @@ class SequenceBuilder {
return AppendPrimitive(data, &bool_tag_, &bools_);
}

/// Appending a python 2 int64_t to the sequence
Status AppendPy2Int64(const int64_t data) {
return AppendPrimitive(data, &py2_int_tag_, &py2_ints_);
}

/// Appending an int64_t to the sequence
Status AppendInt64(const int64_t data) {
return AppendPrimitive(data, &int_tag_, &ints_);
Expand Down Expand Up @@ -250,6 +256,7 @@ class SequenceBuilder {

RETURN_NOT_OK(AddElement(bool_tag_, &bools_));
RETURN_NOT_OK(AddElement(int_tag_, &ints_));
RETURN_NOT_OK(AddElement(py2_int_tag_, &py2_ints_, "py2_int"));
RETURN_NOT_OK(AddElement(string_tag_, &strings_));
RETURN_NOT_OK(AddElement(bytes_tag_, &bytes_));
RETURN_NOT_OK(AddElement(half_float_tag_, &half_floats_));
Expand Down Expand Up @@ -280,6 +287,7 @@ class SequenceBuilder {
NullBuilder nones_;
BooleanBuilder bools_;
Int64Builder ints_;
Int64Builder py2_ints_;
BinaryBuilder bytes_;
StringBuilder strings_;
HalfFloatBuilder half_floats_;
Expand All @@ -302,6 +310,7 @@ class SequenceBuilder {
// happens in the UPDATE macro in sequence.cc.
int8_t bool_tag_ = -1;
int8_t int_tag_ = -1;
int8_t py2_int_tag_ = -1;
int8_t string_tag_ = -1;
int8_t bytes_tag_ = -1;
int8_t half_float_tag_ = -1;
Expand Down Expand Up @@ -481,7 +490,7 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
}
#if PY_MAJOR_VERSION < 3
} else if (PyInt_Check(elem)) {
RETURN_NOT_OK(builder->AppendInt64(static_cast<int64_t>(PyInt_AS_LONG(elem))));
RETURN_NOT_OK(builder->AppendPy2Int64(static_cast<int64_t>(PyInt_AS_LONG(elem))));
#endif
} else if (PyBytes_Check(elem)) {
auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem));
Expand Down

0 comments on commit d5e5e5d

Please sign in to comment.