Skip to content

Commit

Permalink
ARROW-7273: [Python][C++][Parquet] Do not permit constructing a non-n…
Browse files Browse the repository at this point in the history
…ullable null field in Python, catch this case in Arrow->Parquet schema conversion

This was the simplest triage I could think of. Fixes a DCHECK failure in debug / segfault in release builds

Closes #7562 from wesm/ARROW-7273

Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
wesm committed Jun 28, 2020
1 parent 2b37fd4 commit d57d8a2
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 4 deletions.
13 changes: 11 additions & 2 deletions cpp/src/parquet/arrow/arrow_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -813,8 +813,7 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
-1},
{"timestamp(nanosecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::NANO, "CET"),
LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
-1},
{"null", ::arrow::null(), LogicalType::Null(), ParquetType::INT32, -1}};
-1}};

std::vector<std::shared_ptr<Field>> arrow_fields;
std::vector<NodePtr> parquet_fields;
Expand Down Expand Up @@ -1118,6 +1117,16 @@ TEST(InvalidSchema, ParquetNegativeDecimalScale) {
ToParquetSchema(arrow_schema.get(), *properties.get(), &result_schema));
}

TEST(InvalidSchema, NonNullableNullType) {
const auto& field = ::arrow::field("f0", ::arrow::null(), /*nullable=*/false);
const auto& arrow_schema = ::arrow::schema({field});
std::shared_ptr<::parquet::WriterProperties> properties =
::parquet::default_writer_properties();
std::shared_ptr<SchemaDescriptor> result_schema;
ASSERT_RAISES(Invalid,
ToParquetSchema(arrow_schema.get(), *properties.get(), &result_schema));
}

TEST(TestFromParquetSchema, CorruptMetadata) {
// PARQUET-1565: ensure that an IOError is returned when the parquet file contains
// corrupted metadata.
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,13 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
int scale = -1;

switch (field->type()->id()) {
case ArrowTypeId::NA:
case ArrowTypeId::NA: {
type = ParquetType::INT32;
logical_type = LogicalType::Null();
break;
if (repetition != Repetition::OPTIONAL) {
return Status::Invalid("NullType Arrow field must be nullable");
}
} break;
case ArrowTypeId::BOOL:
type = ParquetType::BOOLEAN;
break;
Expand Down
6 changes: 6 additions & 0 deletions python/pyarrow/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ def test_is_null():
assert not types.is_null(pa.list_(pa.int32()))


def test_null_field_may_not_be_non_nullable():
# ARROW-7273
with pytest.raises(ValueError):
pa.field('f0', pa.null(), nullable=False)


def test_is_decimal():
assert types.is_decimal(pa.decimal128(19, 4))
assert not types.is_decimal(pa.int32())
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,9 @@ def field(name, type, bint nullable=True, metadata=None):
metadata = ensure_metadata(metadata, allow_none=True)
c_meta = pyarrow_unwrap_metadata(metadata)

if _type.type.id() == _Type_NA and not nullable:
raise ValueError("A null type field may not be non-nullable")

result.sp_field.reset(
new CField(tobytes(name), _type.sp_type, nullable, c_meta)
)
Expand Down

0 comments on commit d57d8a2

Please sign in to comment.