diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 2f1dcda005cc8..b61694bd56d88 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -813,8 +813,7 @@ TEST_F(TestConvertArrowSchema, ArrowFields) { -1}, {"timestamp(nanosecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::NANO, "CET"), LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), ParquetType::INT64, - -1}, - {"null", ::arrow::null(), LogicalType::Null(), ParquetType::INT32, -1}}; + -1}}; std::vector> arrow_fields; std::vector parquet_fields; @@ -1118,6 +1117,16 @@ TEST(InvalidSchema, ParquetNegativeDecimalScale) { ToParquetSchema(arrow_schema.get(), *properties.get(), &result_schema)); } +TEST(InvalidSchema, NonNullableNullType) { + const auto& field = ::arrow::field("f0", ::arrow::null(), /*nullable=*/false); + const auto& arrow_schema = ::arrow::schema({field}); + std::shared_ptr<::parquet::WriterProperties> properties = + ::parquet::default_writer_properties(); + std::shared_ptr result_schema; + ASSERT_RAISES(Invalid, + ToParquetSchema(arrow_schema.get(), *properties.get(), &result_schema)); +} + TEST(TestFromParquetSchema, CorruptMetadata) { // PARQUET-1565: ensure that an IOError is returned when the parquet file contains // corrupted metadata. diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index de3f52712174a..eca443c149d17 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -226,10 +226,13 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, int scale = -1; switch (field->type()->id()) { - case ArrowTypeId::NA: + case ArrowTypeId::NA: { type = ParquetType::INT32; logical_type = LogicalType::Null(); - break; + if (repetition != Repetition::OPTIONAL) { + return Status::Invalid("NullType Arrow field must be nullable"); + } + } break; case ArrowTypeId::BOOL: type = ParquetType::BOOLEAN; break; diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 80f881dfa5591..eb6dd52b3ba78 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -112,6 +112,12 @@ def test_is_null(): assert not types.is_null(pa.list_(pa.int32())) +def test_null_field_may_not_be_non_nullable(): + # ARROW-7273 + with pytest.raises(ValueError): + pa.field('f0', pa.null(), nullable=False) + + def test_is_decimal(): assert types.is_decimal(pa.decimal128(19, 4)) assert not types.is_decimal(pa.int32()) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 76f2abbfa0831..e541510df0bb0 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1704,6 +1704,9 @@ def field(name, type, bint nullable=True, metadata=None): metadata = ensure_metadata(metadata, allow_none=True) c_meta = pyarrow_unwrap_metadata(metadata) + if _type.type.id() == _Type_NA and not nullable: + raise ValueError("A null type field may not be non-nullable") + result.sp_field.reset( new CField(tobytes(name), _type.sp_type, nullable, c_meta) )