Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Sep 26, 2023
1 parent 5978729 commit a12ee81
Show file tree
Hide file tree
Showing 12 changed files with 365 additions and 200 deletions.
21 changes: 12 additions & 9 deletions ci/scripts/integration_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,23 @@ arrow_dir=${1}
gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration

pip install -e $arrow_dir/dev/archery[integration]
pip install pythonnet

# --run-flight \

# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
time archery integration \
--run-c-data \
--run-ipc \
--run-flight \
--with-cpp=1 \
--with-csharp=1 \
--with-java=1 \
--with-js=1 \
--with-java=0 \
--with-js=0 \
--with-go=1 \
--gold-dirs=$gold_dir/0.14.1 \
--gold-dirs=$gold_dir/0.17.1 \
--gold-dirs=$gold_dir/1.0.0-bigendian \
--gold-dirs=$gold_dir/1.0.0-littleendian \
--gold-dirs=$gold_dir/2.0.0-compression \
--gold-dirs=$gold_dir/4.0.0-shareddict \

# --gold-dirs=$gold_dir/0.14.1 \
# --gold-dirs=$gold_dir/0.17.1 \
# --gold-dirs=$gold_dir/1.0.0-bigendian \
# --gold-dirs=$gold_dir/1.0.0-littleendian \
# --gold-dirs=$gold_dir/2.0.0-compression \
# --gold-dirs=$gold_dir/4.0.0-shareddict \
9 changes: 6 additions & 3 deletions csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static class CArrowSchemaImporter
/// Typically, you will allocate an uninitialized CArrowSchema pointer,
/// pass that to external function, and then use this method to import
/// the result.
///
///
/// <code>
/// CArrowSchema* importedPtr = CArrowSchema.Create();
/// foreign_export_function(importedPtr);
Expand All @@ -62,7 +62,7 @@ public static unsafe ArrowType ImportType(CArrowSchema* ptr)
/// Typically, you will allocate an uninitialized CArrowSchema pointer,
/// pass that to external function, and then use this method to import
/// the result.
///
///
/// <code>
/// CArrowSchema* importedPtr = CArrowSchema.Create();
/// foreign_export_function(importedPtr);
Expand All @@ -87,7 +87,7 @@ public static unsafe Field ImportField(CArrowSchema* ptr)
/// Typically, you will allocate an uninitialized CArrowSchema pointer,
/// pass that to external function, and then use this method to import
/// the result.
///
///
/// <code>
/// CArrowSchema* importedPtr = CArrowSchema.Create();
/// foreign_export_function(importedPtr);
Expand Down Expand Up @@ -241,6 +241,9 @@ public ArrowType GetAsType()
};

string timezone = format.Substring(format.IndexOf(':') + 1);
if (timezone.Length == 0) {
timezone = null;
}
return new TimestampType(timeUnit, timezone);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<TargetFrameworks>net7.0</TargetFrameworks>
</PropertyGroup>

Expand All @@ -13,4 +14,4 @@
<ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" />
</ItemGroup>

</Project>
</Project>
59 changes: 59 additions & 0 deletions csharp/test/Apache.Arrow.IntegrationTest/CDataInterface.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using System.IO;
using Apache.Arrow.C;
using Apache.Arrow.Types;

namespace Apache.Arrow.IntegrationTest
{
/// <summary>
/// Bridge for C Data Interface integration testing.
/// These methods are called from the Python integration testing
/// harness provided by Archery.
/// </summary>
public static class CDataInterface
{
// Archery uses the `pythonnet` library (*) to invoke .Net DLLs.
// Unfortunately, `pythonnet` is only able to marshal simple types,
// which is why we provide trivial wrappers around other APIs.
//
// (*) https://pythonnet.github.io/

public static unsafe Schema ImportSchema(long ptr)
{
return CArrowSchemaImporter.ImportSchema((CArrowSchema*) ptr);
}

public static unsafe void ExportSchema(Schema schema, long ptr)
{
CArrowSchemaExporter.ExportSchema(schema, (CArrowSchema*) ptr);
}

public static JsonFile ParseJsonFile(String jsonPath)
{
return JsonFile.Parse(new FileInfo(jsonPath)).GetAwaiter().GetResult();
}

public static long GetTotalGCMemory()
{
// XXX this doesn't seem to give stable and reliable measurements
GC.Collect();
return GC.GetTotalMemory(forceFullCollection: true);
}
}
}
169 changes: 3 additions & 166 deletions csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ private async Task<int> Validate()
return -1;
}

Schema jsonFileSchema = CreateSchema(jsonFile.Schema);
Schema jsonFileSchema = jsonFile.Schema.ToArrow();
Schema arrowFileSchema = reader.Schema;

SchemaComparer.Compare(jsonFileSchema, arrowFileSchema);
Expand All @@ -98,7 +98,7 @@ private async Task<int> Validate()
private async Task<int> JsonToArrow()
{
JsonFile jsonFile = await ParseJsonFile();
Schema schema = CreateSchema(jsonFile.Schema);
Schema schema = jsonFile.Schema.ToArrow();

using (FileStream fs = ArrowFileInfo.Create())
{
Expand Down Expand Up @@ -137,162 +137,6 @@ private RecordBatch CreateRecordBatch(Schema schema, JsonRecordBatch jsonRecordB
return new RecordBatch(schema, arrays, jsonRecordBatch.Count);
}

private static Schema CreateSchema(JsonSchema jsonSchema)
{
Schema.Builder builder = new Schema.Builder();
for (int i = 0; i < jsonSchema.Fields.Count; i++)
{
builder.Field(f => CreateField(f, jsonSchema.Fields[i]));
}
return builder.Build();
}

private static void CreateField(Field.Builder builder, JsonField jsonField)
{
Field[] children = null;
if (jsonField.Children?.Count > 0)
{
children = new Field[jsonField.Children.Count];
for (int i = 0; i < jsonField.Children.Count; i++)
{
Field.Builder field = new Field.Builder();
CreateField(field, jsonField.Children[i]);
children[i] = field.Build();
}
}

builder.Name(jsonField.Name)
.DataType(ToArrowType(jsonField.Type, children))
.Nullable(jsonField.Nullable);

if (jsonField.Metadata != null)
{
builder.Metadata(jsonField.Metadata);
}
}

private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
{
return type.Name switch
{
"bool" => BooleanType.Default,
"int" => ToIntArrowType(type),
"floatingpoint" => ToFloatingPointArrowType(type),
"decimal" => ToDecimalArrowType(type),
"binary" => BinaryType.Default,
"utf8" => StringType.Default,
"fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth),
"date" => ToDateArrowType(type),
"time" => ToTimeArrowType(type),
"timestamp" => ToTimestampArrowType(type),
"list" => ToListArrowType(type, children),
"fixedsizelist" => ToFixedSizeListArrowType(type, children),
"struct" => ToStructArrowType(type, children),
"union" => ToUnionArrowType(type, children),
"null" => NullType.Default,
_ => throw new NotSupportedException($"JsonArrowType not supported: {type.Name}")
};
}

private static IArrowType ToIntArrowType(JsonArrowType type)
{
return (type.BitWidth, type.IsSigned) switch
{
(8, true) => Int8Type.Default,
(8, false) => UInt8Type.Default,
(16, true) => Int16Type.Default,
(16, false) => UInt16Type.Default,
(32, true) => Int32Type.Default,
(32, false) => UInt32Type.Default,
(64, true) => Int64Type.Default,
(64, false) => UInt64Type.Default,
_ => throw new NotSupportedException($"Int type not supported: {type.BitWidth}, {type.IsSigned}")
};
}

private static IArrowType ToFloatingPointArrowType(JsonArrowType type)
{
return type.FloatingPointPrecision switch
{
"SINGLE" => FloatType.Default,
"DOUBLE" => DoubleType.Default,
_ => throw new NotSupportedException($"FloatingPoint type not supported: {type.FloatingPointPrecision}")
};
}

private static IArrowType ToDecimalArrowType(JsonArrowType type)
{
return type.BitWidth switch
{
256 => new Decimal256Type(type.DecimalPrecision, type.Scale),
_ => new Decimal128Type(type.DecimalPrecision, type.Scale),
};
}

private static IArrowType ToDateArrowType(JsonArrowType type)
{
return type.Unit switch
{
"DAY" => Date32Type.Default,
"MILLISECOND" => Date64Type.Default,
_ => throw new NotSupportedException($"Date type not supported: {type.Unit}")
};
}

private static IArrowType ToTimeArrowType(JsonArrowType type)
{
return (type.Unit, type.BitWidth) switch
{
("SECOND", 32) => new Time32Type(TimeUnit.Second),
("SECOND", 64) => new Time64Type(TimeUnit.Second),
("MILLISECOND", 32) => new Time32Type(TimeUnit.Millisecond),
("MILLISECOND", 64) => new Time64Type(TimeUnit.Millisecond),
("MICROSECOND", 32) => new Time32Type(TimeUnit.Microsecond),
("MICROSECOND", 64) => new Time64Type(TimeUnit.Microsecond),
("NANOSECOND", 32) => new Time32Type(TimeUnit.Nanosecond),
("NANOSECOND", 64) => new Time64Type(TimeUnit.Nanosecond),
_ => throw new NotSupportedException($"Time type not supported: {type.Unit}, {type.BitWidth}")
};
}

private static IArrowType ToTimestampArrowType(JsonArrowType type)
{
return type.Unit switch
{
"SECOND" => new TimestampType(TimeUnit.Second, type.Timezone),
"MILLISECOND" => new TimestampType(TimeUnit.Millisecond, type.Timezone),
"MICROSECOND" => new TimestampType(TimeUnit.Microsecond, type.Timezone),
"NANOSECOND" => new TimestampType(TimeUnit.Nanosecond, type.Timezone),
_ => throw new NotSupportedException($"Time type not supported: {type.Unit}, {type.BitWidth}")
};
}

private static IArrowType ToListArrowType(JsonArrowType type, Field[] children)
{
return new ListType(children[0]);
}

private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children)
{
return new FixedSizeListType(children[0], type.ListSize);
}

private static IArrowType ToStructArrowType(JsonArrowType type, Field[] children)
{
return new StructType(children);
}

private static IArrowType ToUnionArrowType(JsonArrowType type, Field[] children)
{
UnionMode mode = type.Mode switch
{
"SPARSE" => UnionMode.Sparse,
"DENSE" => UnionMode.Dense,
_ => throw new NotSupportedException($"Union mode not supported: {type.Mode}"),
};
return new UnionType(children, type.TypeIds, mode);
}

private class ArrayCreator :
IArrowTypeVisitor<BooleanType>,
IArrowTypeVisitor<Int8Type>,
Expand Down Expand Up @@ -752,14 +596,7 @@ private async Task<int> FileToStream()

private async ValueTask<JsonFile> ParseJsonFile()
{
using var fileStream = JsonFileInfo.OpenRead();
JsonSerializerOptions options = new JsonSerializerOptions()
{
PropertyNamingPolicy = JsonFileNamingPolicy.Instance,
};
options.Converters.Add(new ValidityConverter());

return await JsonSerializer.DeserializeAsync<JsonFile>(fileStream, options);
return await JsonFile.Parse(JsonFileInfo);
}
}
}
Loading

0 comments on commit a12ee81

Please sign in to comment.