diff --git a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs index 14cfd9b0591b9..79880c894b13d 100644 --- a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs @@ -52,5 +52,28 @@ protected override bool FieldIsValid(IArrowArray fieldArray, int index) { return fieldArray.IsValid(ValueOffsets[index]); } + + internal new static int ComputeNullCount(ArrayData data) + { + var offset = data.Offset; + var length = data.Length; + var typeIds = data.Buffers[0].Span.Slice(offset, length); + var valueOffsets = data.Buffers[1].Span.CastTo().Slice(offset, length); + var childArrays = new IArrowArray[data.Children.Length]; + for (var childIdx = 0; childIdx < data.Children.Length; ++childIdx) + { + childArrays[childIdx] = ArrowArrayFactory.BuildArray(data.Children[childIdx]); + } + + var nullCount = 0; + for (var i = 0; i < length; ++i) + { + var typeId = typeIds[i]; + var valueOffset = valueOffsets[i]; + nullCount += childArrays[typeId].IsNull(valueOffset) ? 1 : 0; + } + + return nullCount; + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs index 1c1d1033d38a8..5b29489ebb1f0 100644 --- a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs @@ -46,5 +46,26 @@ protected override bool FieldIsValid(IArrowArray fieldArray, int index) { return fieldArray.IsValid(index); } + + internal new static int ComputeNullCount(ArrayData data) + { + var offset = data.Offset; + var length = data.Length; + var typeIds = data.Buffers[0].Span.Slice(offset, length); + var childArrays = new IArrowArray[data.Children.Length]; + for (var childIdx = 0; childIdx < data.Children.Length; ++childIdx) + { + childArrays[childIdx] = ArrowArrayFactory.BuildArray(data.Children[childIdx]); + } + + var nullCount = 0; + for (var i = 0; i < data.Length; ++i) + { + var typeId = typeIds[i]; + nullCount += childArrays[typeId].IsNull(offset + i) ? 1 : 0; + } + + return nullCount; + } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index b11479c0d4460..1cdd97e4ac6c9 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -557,6 +557,29 @@ public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOp } } + private void CreateSelfAndChildrenFieldNodes(ArrayData data) + { + if (data.DataType is NestedType) + { + // flatbuffer struct vectors have to be created in reverse order + for (int i = data.Children.Length - 1; i >= 0; i--) + { + CreateSelfAndChildrenFieldNodes(data.Children[i]); + } + } + Flatbuf.FieldNode.CreateFieldNode(Builder, data.Length, data.GetNullCount()); + } + + private static int CountAllNodes(IReadOnlyList fields) + { + int count = 0; + foreach (Field arrowArray in fields) + { + CountSelfAndChildrenNodes(arrowArray.DataType, ref count); + } + return count; + } + private Offset GetBodyCompression() { if (_options.CompressionCodec == null) diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs index 1fb5cf2415c68..45fed722a745c 100644 --- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs @@ -25,6 +25,46 @@ public class UnionArrayTests [InlineData(UnionMode.Sparse)] [InlineData(UnionMode.Dense)] public void UnionArray_IsNull(UnionMode mode) + { + var (array, expectedNull) = BuildUnionArray(mode, 100); + + for (var i = 0; i < array.Length; ++i) + { + Assert.Equal(expectedNull[i], array.IsNull(i)); + Assert.Equal(!expectedNull[i], array.IsValid(i)); + } + } + + [Theory] + [InlineData(UnionMode.Sparse)] + [InlineData(UnionMode.Dense)] + public void UnionArray_Slice(UnionMode mode) + { + var (array, expectedNull) = BuildUnionArray(mode, 10); + + for (var offset = 0; offset < array.Length; ++offset) + { + for (var length = 0; length < array.Length - offset; ++length) + { + var slicedArray = ArrowArrayFactory.Slice(array, offset, length); + + var nullCount = 0; + for (var i = 0; i < slicedArray.Length; ++i) + { + // TODO: Shouldn't need to add offset in IsNull/IsValid calls, + // see https://github.com/apache/arrow/issues/41140 + Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(offset + i)); + Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(offset + i)); + nullCount += expectedNull[offset + i] ? 1 : 0; + } + + Assert.True(nullCount == slicedArray.NullCount, $"offset = {offset}, length = {length}"); + Assert.Equal(nullCount, slicedArray.NullCount); + } + } + } + + private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode, int length) { var fields = new Field[] { @@ -34,7 +74,6 @@ public void UnionArray_IsNull(UnionMode mode) var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray(); var type = new UnionType(fields, typeIds, mode); - const int length = 100; var nullCount = 0; var field0Builder = new Int32Array.Builder(); var field1Builder = new FloatArray.Builder(); @@ -44,7 +83,7 @@ public void UnionArray_IsNull(UnionMode mode) for (var i = 0; i < length; ++i) { - var isNull = i % 5 == 0; + var isNull = i % 3 == 0; expectedNull[i] = isNull; nullCount += isNull ? 1 : 0; @@ -104,10 +143,6 @@ public void UnionArray_IsNull(UnionMode mode) ? new DenseUnionArray(type, length, children, typeIdsBuffer, valuesOffsetBuffer, nullCount) : new SparseUnionArray(type, length, children, typeIdsBuffer, nullCount); - for (var i = 0; i < length; ++i) - { - Assert.Equal(expectedNull[i], array.IsNull(i)); - Assert.Equal(!expectedNull[i], array.IsValid(i)); - } + return (array, expectedNull); } }