diff --git a/README.md b/README.md index 71d6f04..4cd3c29 100644 --- a/README.md +++ b/README.md @@ -58,11 +58,7 @@ Notes: ### Build arrays -The default `Builder` can map Zig types with reasonable defaults except for Dictionary types. You can import it like this: -```zig -const Builder = @import("arrow").array.Builder; -``` - +The default `Builder` can map Zig types with reasonable defaults except for Dictionary types. You can use it like this: ```zig var b = try Builder(?i16).init(allocator); try b.append(null); @@ -83,10 +79,6 @@ error: expected type 'i16', found '@TypeOf(null)' ``` Dictionary types must use an explicit builder. -```zig -const DictBuilder = @import("arrow").array.dict.Builder; -``` - ```zig var b = try DictBuilder(?[]const u8).init(allocator); try b.appendNull(); @@ -95,7 +87,7 @@ try b.append("there"); try b.append("friend"); ``` -You can customize exactly how the type maps to Arrow with each type's `BuilderAdvanced`. For example to build a sparse union of structs: +You can customize exactly how to build Arrow types with each type's `BuilderAdvanced`. For example to build a sparse union of nullable structs: ```zig var b = try UnionBuilder( struct { @@ -111,19 +103,24 @@ try b.append(.{ .f = 3 }); try b.append(.{ .i = 5 }); ``` -You can view [sample.zig](./src/sample.zig) which has an example for all supported types. +You can view [sample.zig](./src/sample.zig) which has examples for all supported types. ### FFI -Arrow has a [C ABI](https://arrow.apache.org/docs/format/CDataInterface.html) that allows in-memory array importing and exporting that only copies metadata. +Arrow has a [C ABI](https://arrow.apache.org/docs/format/CDataInterface.html) that allows importing and exporting arrays over an FFI boundary by only copying metadata. #### Export If you have a normal `Array` you can export it to a `abi.Schema` and `abi.Array` to share the memory with other code (i.e. scripting languages). When you do so, that code is responsible for calling `abi.Schema.release(&schema)` and `abi.Array.release(&array)` to free memory. ```zig -var abi_schema = try abi.Schema.init(array); +const array = try arrow.sample.all(allocator); +errdefer array.deinit(); + +// Note: these are stack allocated. var abi_arr = try abi.Array.init(array); +var abi_schema = try abi.Schema.init(array); + externFn(&abi_schema, &abi_arr); ``` @@ -132,10 +129,11 @@ externFn(&abi_schema, &abi_arr); If you have a `abi.Schema` and `abi.Array` you can transform them to an `ImportedArray` that contains a normal `Array`. Be a good steward and free the memory with `imported.deinit()`. ```zig -const array = sample.all(); +const array = try arrow.sample.all(allocator); + var abi_schema = try abi.Schema.init(array); var abi_arr = try abi.Array.init(array); -var imported = try ImportedArray.init(allocator, abi_arr, abi_schema); +var imported = try arrow.ffi.ImportedArray.init(allocator, abi_arr, abi_schema); defer imported.deinit(); ``` @@ -146,16 +144,17 @@ Array has an [IPC format](https://arrow.apache.org/docs/format/Columnar.html#ser I cannot in good faith recommend using this format for the following reasons: 1. [Array types](#Usage) are complicated and difficult to generically map to other type systems. -2. Despite claiming to be zero-copy, if an array's buffer uses compression it must be copied. This implementation will also copy is its alignment is not 64 (C++ implementation uses 8). +2. Despite claiming to be zero-copy, if an array's buffer uses compression it must be copied. This implementation will also copy is its alignment is not 64 (the C++ implementation and most files use 8). 3. Post-compression size savings compared to CSV are marginal. 4. Poor backwards compatability. There have been 5 versions of the format, most undocumented, with multiple breaking changes. I also have the following gripes from implementing it: -1. Poor existing tooling. Tools cannot inspect individual messages and have poor error messages. Despite the message format being designed for streaming existing tools work on the entire file at once. -2. Poor documentation. The upstream [`File.fbs`](https://github.com/apache/arrow/blob/main/format/File.fbs) has numerous **incorrect** comments. -3. The message custom metadata that would make the format more useful than just shared `ffi` memory is inaccessible in most implementations (including this one) since they are justifiably focused on record batches. -4. Existing implementations do not support reading/writing record batches with different schemas. +1. Poor existing tooling. Tools cannot inspect individual messages and have poor error messages. +2. Despite the message format being designed for streaming existing tools work on the entire file at once. +3. Poor documentation. The upstream [`File.fbs`](https://github.com/apache/arrow/blob/main/format/File.fbs) has numerous **incorrect** comments. +4. The message custom metadata that would make the format more useful than just shared `ffi` memory is inaccessible in most implementations (including this one) since they are justifiably focused on record batches. +5. Existing implementations do not support reading/writing record batches with different schemas. This implementation is only provided as a way to dump normal `Array`s to disk for later inspection. @@ -181,7 +180,7 @@ If feeling daring, you can use the streaming API of `ipc.reader.Reader(ReaderTyp You can write record batches of a normal `Arrow` array `ipc.writer.fileWriter`: ```zig -const batch = try sample.all(std.testing.allocator); +const batch = try arrow.sample.all(std.testing.allocator); try batch.toRecordBatch("record batch"); defer batch.deinit(); diff --git a/build.zig b/build.zig index 0f0e833..ebd3a5a 100644 --- a/build.zig +++ b/build.zig @@ -4,33 +4,36 @@ pub const name = "arrow"; const path = "src/lib.zig"; pub fn build(b: *std.Build) !void { - // Expose to zig dependents - _ = b.addModule(name, .{ .source_file = .{ .path = path } }); - const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - const lib = b.addSharedLibrary(.{ - .name = "arrow-zig", // Avoid naming conflict with libarrow - .root_source_file = .{ .path = path }, - .target = target, - .optimize = optimize, - }); - b.installArtifact(lib); - const flatbuffers_dep = b.dependency("flatbuffers-zig", .{ .target = target, .optimize = optimize, }); const flatbuffers_mod = flatbuffers_dep.module("flatbuffers"); - lib.addModule("flatbuffers", flatbuffers_mod); // For generated files to use lib const lz4 = b.dependency("lz4", .{ .target = target, .optimize = optimize, }); const lz4_mod = lz4.module("lz4"); - lib.addModule("lz4", lz4_mod); + // Expose to zig dependents + const module = b.addModule(name, .{ + .source_file = .{ .path = path }, + .dependencies = &.{ + .{ .name = "flatbuffers", .module = flatbuffers_mod }, + .{ .name = "lz4", .module = lz4_mod }, + }, + }); + + const lib = b.addSharedLibrary(.{ + .name = "arrow-zig", // Avoid naming conflict with libarrow + .root_source_file = .{ .path = path }, + .target = target, + .optimize = optimize, + }); + b.installArtifact(lib); const test_step = b.step("test", "Run library tests"); const main_tests = b.addTest(.{ @@ -38,8 +41,8 @@ pub fn build(b: *std.Build) !void { .target = target, .optimize = optimize, }); - main_tests.addModule("flatbuffers", flatbuffers_mod); // For generated files to use lib main_tests.addModule("lz4", lz4_mod); + main_tests.addModule("flatbuffers", flatbuffers_mod); const run_main_tests = b.addRunArtifact(main_tests); test_step.dependOn(&run_main_tests.step); @@ -54,4 +57,14 @@ pub fn build(b: *std.Build) !void { ipc_test.step.dependOn(&run_main_tests.step); integration_test_step.dependOn(&ipc_test.step); integration_test_step.dependOn(&ffi_test.step); + + const example_test_step = b.step("test-examples", "Run example tests"); + const example_tests = b.addTest(.{ + .root_source_file = .{ .path = "./examples/all.zig" }, + .target = target, + .optimize = optimize, + }); + example_tests.addModule("arrow", module); + const run_example_tests = b.addRunArtifact(example_tests); + example_test_step.dependOn(&run_example_tests.step); } diff --git a/examples/all.zig b/examples/all.zig new file mode 100644 index 0000000..e0f17fe --- /dev/null +++ b/examples/all.zig @@ -0,0 +1,5 @@ +test { + _ = @import("./build_arrays.zig"); + _ = @import("./ffi.zig"); + _ = @import("./ipc.zig"); +} diff --git a/examples/build_arrays.zig b/examples/build_arrays.zig new file mode 100644 index 0000000..14e1fbd --- /dev/null +++ b/examples/build_arrays.zig @@ -0,0 +1,33 @@ +const std = @import("std"); +const arrow = @import("arrow"); + +const abi = arrow.abi; +const Builder = arrow.array.Builder; +const DictBuilder = arrow.array.dict.Builder; +const allocator = std.testing.allocator; + +test "build arrays" { + var b = try Builder(?i16).init(allocator); + errdefer b.deinit(); + + try b.append(null); + try b.append(32); + try b.append(33); + try b.append(34); + + const array = try b.finish(); + defer array.deinit(); +} + +test "build dictionary array" { + var b = try DictBuilder(?[]const u8).init(allocator); + errdefer b.deinit(); + + try b.appendNull(); + try b.append("hello"); + try b.append("there"); + try b.append("friend"); + + const array = try b.finish(); + defer array.deinit(); +} diff --git a/examples/ffi.zig b/examples/ffi.zig new file mode 100644 index 0000000..0ab0d7a --- /dev/null +++ b/examples/ffi.zig @@ -0,0 +1,29 @@ +const std = @import("std"); +const arrow = @import("arrow"); + +const abi = arrow.ffi.abi; +const allocator = std.testing.allocator; + +test "ffi export" { + const array = try arrow.sample.all(allocator); + errdefer array.deinit(); + + // Note: these are stack allocated. + var abi_arr = try abi.Array.init(array); + var abi_schema = try abi.Schema.init(array); + + // externFn(&abi_schema, &abi_arr); + + // Normally `externFn` would call these. The order doesn't matter. + abi_schema.release.?(&abi_schema); + abi_arr.release.?(&abi_arr); +} + +test "ffi import" { + const array = try arrow.sample.all(allocator); + + var abi_schema = try abi.Schema.init(array); + var abi_arr = try abi.Array.init(array); + var imported = try arrow.ffi.ImportedArray.init(allocator, abi_arr, abi_schema); + defer imported.deinit(); +} diff --git a/examples/ipc.zig b/examples/ipc.zig new file mode 100644 index 0000000..6dcfee4 --- /dev/null +++ b/examples/ipc.zig @@ -0,0 +1,29 @@ +const std = @import("std"); +const arrow = @import("arrow"); + +const ipc = arrow.ipc; +const allocator = std.testing.allocator; + +test "read file" { + var ipc_reader = try ipc.reader.fileReader(allocator, "./testdata/tickers.arrow"); + defer ipc_reader.deinit(); + + while (try ipc_reader.nextBatch()) |rb| { + // Do something with rb + defer rb.deinit(); + } +} + +test "write file" { + const batch = try arrow.sample.all(std.testing.allocator); + try batch.toRecordBatch("record batch"); + defer batch.deinit(); + + const fname = "./sample.arrow"; + var ipc_writer = try ipc.writer.fileWriter(std.testing.allocator, fname); + defer ipc_writer.deinit(); + try ipc_writer.write(batch); + try ipc_writer.finish(); + + try std.fs.cwd().deleteFile(fname); +} diff --git a/src/array/flat.zig b/src/array/flat.zig index e1a0914..15de24c 100644 --- a/src/array/flat.zig +++ b/src/array/flat.zig @@ -63,16 +63,19 @@ pub fn BuilderAdvanced(comptime T: type, comptime opts: tags.BinaryOptions) type .Bool, .Int, .Float, .ComptimeInt, .ComptimeFloat => try self.values.append(value), .Pointer => |p| switch (p.size) { .Slice => { - std.debug.assert(layout == .VariableBinary); try self.values.appendSlice(value); try self.offsets.append(@intCast(self.values.items.len)); }, else => |t| @compileError("unsupported pointer type " ++ @tagName(t)), }, .Array => |a| { - std.debug.assert(is_fixed); if (a.len != fixed_len) - @compileError(std.fmt.comptimePrint("expected array of len {d} but got array of len {d}", .{ fixed_len, a.len })); + @compileError( + std.fmt.comptimePrint( + "expected array of len {d} but got array of len {d}", + .{ fixed_len, a.len }, + ), + ); try self.values.appendSlice(&value); }, .Null => { diff --git a/src/array/list.zig b/src/array/list.zig index 79684d7..ae5ed8a 100644 --- a/src/array/list.zig +++ b/src/array/list.zig @@ -93,7 +93,13 @@ pub fn BuilderAdvanced( else => |t| @compileError("unsupported pointer type " ++ @tagName(t)), }, .Array => |a| { - std.debug.assert(a.len == fixed_len); + if (a.len != fixed_len) + @compileError( + std.fmt.comptimePrint( + "expected array of len {d} but got array of len {d}", + .{ fixed_len, a.len }, + ), + ); for (value) |v| try self.child.append(v); }, else => |t| @compileError("unsupported append type " ++ @tagName(t)), diff --git a/src/ffi/abi.zig b/src/ffi/abi.zig index 4851312..df5bc04 100644 --- a/src/ffi/abi.zig +++ b/src/ffi/abi.zig @@ -7,6 +7,12 @@ const export_ = @import("export.zig"); pub const Schema = extern struct { const Self = @This(); + pub const PrivateData = struct { + allocator: Allocator, + name_len: usize, + abi_format_on_heap: bool, + }; + format: [*:0]const u8, // Managed name: ?[*:0]const u8 = null, // Managed metadata: ?[*:0]const u8 = null, // Managed @@ -26,25 +32,9 @@ pub const Schema = extern struct { std.debug.assert(@sizeOf(@This()) == 72); } - // Creates a new abi.Schema from a abi.Array. Caller owns abi.Schema and must call `.release`. + /// Creates a new abi.Schema from a abi.Array. Caller owns abi.Schema and must call `.release`. pub fn init(array: *array_mod.Array) !Self { - const layout = array.tag.abiLayout(); - const n_children = if (layout == .Dictionary) 0 else array.children.len; - const Exporter = export_.schema; - - return .{ - .format = try array.tag.abiFormat(array.allocator, n_children), - .name = if (array.name.len == 0) null else try array.allocator.dupeZ(u8, array.name), - .metadata = null, - .flags = .{ - .nullable = array.tag.nullable(), - }, - .n_children = @bitCast(n_children), - .children = try Exporter.children(array, n_children), - .dictionary = try Exporter.dictionary(array, layout), - .release = Exporter.release, - .private_data = @ptrCast(array), - }; + return export_.schema.init(array); } pub fn deinit(self: *Self) void { @@ -136,25 +126,9 @@ pub const Array = extern struct { std.debug.assert(@sizeOf(@This()) == 80); } - // Moves array.Array into a new abi.Array. Caller owns abi.Array and must call `.release`. + /// Moves array.Array into a new abi.Array. Caller owns abi.Array and must call `.release`. pub fn init(array: *array_mod.Array) !Self { - const layout = array.tag.abiLayout(); - const n_buffers = layout.nBuffers(); - const n_children = if (layout == .Dictionary) 0 else array.children.len; - const Exporter = export_.array; - - return .{ - .length = @bitCast(array.length), - .null_count = @bitCast(array.null_count), - .offset = 0, - .n_buffers = @bitCast(n_buffers), - .n_children = @bitCast(n_children), - .buffers = try Exporter.buffers(array, n_buffers), - .children = try Exporter.children(array, n_children), - .dictionary = try Exporter.dictionary(array, layout), - .release = Exporter.release, - .private_data = @ptrCast(array), - }; + return export_.array.init(array); } pub fn deinit(self: *Self) void { diff --git a/src/ffi/array_export.zig b/src/ffi/array_export.zig index 105ac58..ac7d809 100644 --- a/src/ffi/array_export.zig +++ b/src/ffi/array_export.zig @@ -6,39 +6,77 @@ const Allocator = std.mem.Allocator; const Buffer = abi.Array.Buffer; const Buffers = abi.Array.Buffers; -pub fn buffers(array: *Array, n_buffers: usize) !Buffers { +fn buffers(array: *Array, n_buffers: usize) Allocator.Error!Buffers { if (n_buffers == 0) return null; const res = try array.allocator.alloc(Buffer, n_buffers); - for (0..n_buffers) |i| { - const b = array.buffers[i]; + for (array.buffers[0..n_buffers], 0..) |b, i| { res[i] = if (b.len > 0) @ptrCast(b.ptr) else null; } return @ptrCast(res); } -pub fn children(array: *Array, n_children: usize) Allocator.Error!?[*]*abi.Array { +fn children(array: *Array, n_children: usize) Allocator.Error!?[*]*abi.Array { if (array.children.len == 0) return null; - const res = try array.allocator.alloc(*abi.Array, n_children); - for (0..n_children) |j| { - res[j] = try array.allocator.create(abi.Array); - res[j].* = try abi.Array.init(array.children[j]); + const allocator = array.allocator; + const res = try allocator.alloc(*abi.Array, n_children); + var i: usize = 0; + // TODO: how to track if allocator.create fails? + errdefer { + for (0..i) |j| { + res[j].deinit(); + allocator.destroy(res[j]); + } + } + for (0..n_children) |_| { + res[i] = try array.allocator.create(abi.Array); + res[i].* = try abi.Array.init(array.children[i]); + i += 1; } return @ptrCast(res); } -pub fn dictionary(array: *Array, layout: abi.Array.Layout) Allocator.Error!?*abi.Array { +fn dictionary(array: *Array, layout: abi.Array.Layout) Allocator.Error!?*abi.Array { if (layout != .Dictionary) return null; - var res = try array.allocator.create(abi.Array); + const allocator = array.allocator; + var res = try allocator.create(abi.Array); + errdefer allocator.destroy(res); res.* = try abi.Array.init(array.children[0]); return @ptrCast(res); } +pub fn init(array: *Array) Allocator.Error!abi.Array { + const layout = array.tag.abiLayout(); + const n_buffers = layout.nBuffers(); + const n_children = if (layout == .Dictionary) 0 else array.children.len; + const allocator = array.allocator; + + const buffers_ = try buffers(array, n_buffers); + errdefer if (buffers_) |buffers__| allocator.free(buffers__[0..n_buffers]); + const children_ = try children(array, n_children); + errdefer if (buffers_) |buffers__| allocator.free(buffers__[0..n_buffers]); + const dictionary_ = try dictionary(array, layout); + errdefer if (dictionary_) |d| if (d.release) |r| r(d); + + return .{ + .length = @bitCast(array.length), + .null_count = @bitCast(array.null_count), + .offset = 0, + .n_buffers = @bitCast(n_buffers), + .n_children = @bitCast(n_children), + .buffers = buffers_, + .children = children_, + .dictionary = dictionary_, + .release = release, + .private_data = @ptrCast(array), + }; +} + pub fn release(array: *abi.Array) callconv(.C) void { const allocator = brk: { const arr: *Array = @ptrCast(@alignCast(array.private_data)); diff --git a/src/ffi/export.zig b/src/ffi/export.zig index 8d1f90a..dc01762 100644 --- a/src/ffi/export.zig +++ b/src/ffi/export.zig @@ -1,2 +1,2 @@ -pub const array = @import("array_export.zig"); -pub const schema = @import("schema_export.zig"); +pub const array = @import("./array_export.zig"); +pub const schema = @import("./schema_export.zig"); diff --git a/src/ffi/import.zig b/src/ffi/import.zig index b13f885..983f3e2 100644 --- a/src/ffi/import.zig +++ b/src/ffi/import.zig @@ -19,7 +19,7 @@ pub const ImportedArray = struct { array: *Array, // We have to store this arr and schema for our whole lifetime so we can call their release functions. - // TODO: only store release function pointers and recreate these at release time to save on memory + // TODO: find a better way abi_schema: abi.Schema, abi_arr: abi.Array, diff --git a/src/ffi/lib.zig b/src/ffi/lib.zig new file mode 100644 index 0000000..990d345 --- /dev/null +++ b/src/ffi/lib.zig @@ -0,0 +1,2 @@ +pub const abi = @import("./abi.zig"); +pub const ImportedArray = @import("./import.zig").ImportedArray; diff --git a/src/ffi/schema_export.zig b/src/ffi/schema_export.zig index 5882c9a..782de10 100644 --- a/src/ffi/schema_export.zig +++ b/src/ffi/schema_export.zig @@ -3,28 +3,99 @@ const abi = @import("abi.zig"); const Array = @import("../array/array.zig").Array; const Allocator = std.mem.Allocator; -pub fn children(arr: *Array, n_children: usize) Allocator.Error!?[*]*abi.Schema { +fn children(arr: *Array, n_children: usize) Allocator.Error!?[*]*abi.Schema { if (n_children == 0) return null; - const res = try arr.allocator.alloc(*abi.Schema, n_children); - for (0..n_children) |j| { - res[j] = try arr.allocator.create(abi.Schema); - res[j].* = try abi.Schema.init(arr.children[j]); + const allocator = arr.allocator; + const res = try allocator.alloc(*abi.Schema, n_children); + var i: usize = 0; + // TODO: how to track if allocator.create fails? + errdefer { + for (0..i) |j| { + res[j].deinit(); + allocator.destroy(res[j]); + } + } + for (0..n_children) |_| { + res[i] = try allocator.create(abi.Schema); + res[i].* = try abi.Schema.init(arr.children[i]); + i += 1; } return @ptrCast(res); } -pub fn dictionary(arr: *Array, layout: abi.Array.Layout) Allocator.Error!?*abi.Schema { +fn dictionary(arr: *Array, layout: abi.Array.Layout) Allocator.Error!?*abi.Schema { if (layout != .Dictionary) return null; - var res = try arr.allocator.create(abi.Schema); + const allocator = arr.allocator; + var res = try allocator.create(abi.Schema); + errdefer allocator.destroy(res); res.* = try abi.Schema.init(arr.children[0]); return @ptrCast(res); } -pub fn release(schema: *abi.Schema) callconv(.C) void { - const arr: *align(1) Array = @ptrCast(schema.private_data); +fn privateData(arr: *Array, format_on_heap: bool) Allocator.Error!?*abi.Schema.PrivateData { + if (arr.name.len == 0 and !format_on_heap) return null; + const allocator = arr.allocator; + var res = try allocator.create(abi.Schema.PrivateData); + errdefer allocator.destroy(res); + res.* = .{ + .allocator = allocator, + .name_len = arr.name.len, + .abi_format_on_heap = format_on_heap, + }; + return res; +} + +pub fn init(array: *Array) Allocator.Error!abi.Schema { + const allocator = array.allocator; + const layout = array.tag.abiLayout(); + const n_children = if (layout == .Dictionary) 0 else array.children.len; + const format_on_heap = array.tag.isAbiFormatOnHeap(); + + const format = try array.tag.abiFormat(allocator, n_children); + errdefer if (format_on_heap) allocator.free(std.mem.span(format)); + + const name: ?[*:0]const u8 = if (array.name.len == 0) + null + else + try allocator.dupeZ(u8, array.name); + errdefer if (name) |n| allocator.free(n[0..array.name.len]); + + const children_ = try children(array, n_children); + errdefer { + if (children_) |children__| { + for (children__[0..n_children]) |c| { + c.deinit(); + allocator.destroy(c); + } + } + } + + const dictionary_ = try dictionary(array, layout); + errdefer if (dictionary_) |d| if (d.release) |r| r(d); + + const private_data = try privateData(array, format_on_heap); + errdefer if (private_data) |p| allocator.destroy(p); + + return .{ + .format = format, + .name = name, + .metadata = null, + .flags = .{ .nullable = array.tag.nullable() }, + .n_children = @bitCast(n_children), + .children = children_, + .dictionary = dictionary_, + .release = release, + .private_data = @ptrCast(private_data), + }; +} + +pub fn release(schema: *abi.Schema) callconv(.C) void { + if (schema.private_data == null) return; + const private_data: *abi.Schema.PrivateData = @ptrCast(@alignCast(schema.private_data)); + const allocator = private_data.allocator; if (schema.children) |children_| { const len: usize = @bitCast(schema.n_children); const children_slice = children_[0..len]; @@ -38,7 +109,8 @@ pub fn release(schema: *abi.Schema) callconv(.C) void { d.release.?(d); allocator.destroy(d); } - if (schema.name) |n| allocator.free(n[0 .. arr.name.len + 1]); - if (arr.tag.isAbiFormatOnHeap()) allocator.free(std.mem.span(schema.format)); + if (schema.name) |n| allocator.free(n[0 .. private_data.name_len + 1]); + if (private_data.abi_format_on_heap) allocator.free(std.mem.span(schema.format)); + allocator.destroy(private_data); schema.*.release = null; } diff --git a/src/ffi/tests.zig b/src/ffi/tests.zig index e7281b3..b17a802 100644 --- a/src/ffi/tests.zig +++ b/src/ffi/tests.zig @@ -17,7 +17,10 @@ fn testExport(array: *Array, comptime format_string: []const u8) !void { var abi_schema = try abi.Schema.init(array); defer abi_schema.release.?(&abi_schema); - try std.testing.expectEqualStrings(format_string ++ "\x00", abi_schema.format[0 .. format_string.len + 1]); + try std.testing.expectEqualStrings( + format_string ++ "\x00", + abi_schema.format[0 .. format_string.len + 1], + ); } fn testImport(array: *Array) !void { diff --git a/src/ipc/writer.zig b/src/ipc/writer.zig index 01c6f01..e713675 100644 --- a/src/ipc/writer.zig +++ b/src/ipc/writer.zig @@ -224,15 +224,13 @@ pub fn Writer(comptime WriterType: type) type { defer self.allocator.free(bytes); const offset = self.dest.bytes_written; - std.debug.assert(@mod(offset, message_alignment) == 0); - const n_padding = getPadding(message_alignment, bytes.len); const len: shared.MessageLen = @intCast(bytes.len + n_padding); + try self.dest.writer().writeIntLittle(shared.MessageLen, shared.continuation); try self.dest.writer().writeIntLittle(shared.MessageLen, len); try self.dest.writer().writeAll(bytes); for (0..n_padding) |_| try self.dest.writer().writeByte(0); - std.debug.assert(@mod(len, message_alignment) == 0); return .{ .offset = @bitCast(offset), @@ -260,14 +258,10 @@ pub fn Writer(comptime WriterType: type) type { var nodes = try std.ArrayList(FieldNode).initCapacity(self.allocator, n_fields); errdefer nodes.deinit(); for (array.children) |c| try getFieldNodes(&nodes, c); - // for (nodes.items) |n| log.debug("write {any}", .{n}); - std.debug.assert(nodes.items.len == n_fields); var buffers = try std.ArrayList(Buffer).initCapacity(self.allocator, n_buffers); errdefer buffers.deinit(); for (array.children) |c| _ = try writeBuffers(c, void, &buffers); - // for (buffers.items) |n| log.debug("write {any}", .{n}); - std.debug.assert(buffers.items.len == n_buffers); return .{ .length = @bitCast(array.length), @@ -287,9 +281,6 @@ pub fn Writer(comptime WriterType: type) type { var res = try self.writeMessage(message); res.body_length = @bitCast(try writeBuffers(array, self.dest.writer(), null)); - std.debug.assert(res.body_length == message.body_length); - std.debug.assert(@mod(res.body_length, 8) == 0); - return res; } @@ -316,10 +307,7 @@ pub fn Writer(comptime WriterType: type) type { var res = try self.writeMessage(message); res.body_length = @bitCast(try writeBuffers(dict, self.dest.writer(), null)); - std.debug.assert(res.body_length == message.body_length); - std.debug.assert(@mod(res.body_length, 8) == 0); self.dict_id += 1; - return res; } }; diff --git a/src/lib.zig b/src/lib.zig index 4e24342..920a9ee 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,6 +1,7 @@ const std = @import("std"); -pub const abi = @import("ffi/abi.zig"); -pub const sample = @import("sample.zig"); +pub const ffi = @import("./ffi/lib.zig"); +pub const abi = ffi.abi; +pub const sample = @import("./sample.zig"); pub const ipc = @import("./ipc/lib.zig"); pub const Array = @import("./array/array.zig").Array; pub const array = @import("./array/lib.zig"); diff --git a/src/tags.zig b/src/tags.zig index 5387795..123027c 100644 --- a/src/tags.zig +++ b/src/tags.zig @@ -198,6 +198,7 @@ pub const Tag = union(enum) { try std.testing.expectEqual(Tag.Int, Tag.fromPrimitive(?i32, .{ .nullable = true })); try std.testing.expectEqual(Tag.Binary, Tag.fromPrimitive([]u8, .{ .nullable = false })); try std.testing.expectEqual(Tag.Binary, Tag.fromPrimitive([]?u8, .{ .nullable = true })); + try std.testing.expectEqual(Tag.FixedBinary, Tag.fromPrimitive([3]u8, .{ .nullable = false })); } pub fn Primitive(comptime self: Self) type { diff --git a/test_ffi.py b/test_ffi.py index e68b900..2fd77a0 100755 --- a/test_ffi.py +++ b/test_ffi.py @@ -1,6 +1,6 @@ #!/bin/env python # Requires pyarrow. -# `zig build` -> run this + from os import path import sys from ctypes import * diff --git a/test_ipc.py b/test_ipc.py old mode 100644 new mode 100755 index 23cd8bc..86dddcf --- a/test_ipc.py +++ b/test_ipc.py @@ -1,6 +1,6 @@ #!/bin/env python # Requires pyarrow. -# `zig build test` -> run this + import sys import pyarrow as pa diff --git a/testdata/sample_written.arrow b/testdata/sample_written.arrow index dc28c4d..5e04021 100644 Binary files a/testdata/sample_written.arrow and b/testdata/sample_written.arrow differ