Skip to content

Commit

Permalink
- commit the generated data into the reop, because this facilitates r…
Browse files Browse the repository at this point in the history
…unning the tests in CI

  (since generating the data requires to equip the test runners with pyspark, which at least mother prefers not to)

- the data is minimized to 5MB, by setting the scale factor 10x smaller, and from expected_results/ only keeping last/
  this motivates rewriting some testcases that used expected_results/q08 into expected_results/last (which is the same)

- removed one testcase, which tests if we get an error when parquet is not loaded
  (in MotherDuck, when we run iceberg in so-called "remote_only" mode, this will actually not raise an error)
  it was kind of a trivial test, so I hope this is OK..
  • Loading branch information
peter committed Jun 28, 2024
1 parent be7538e commit 12d1758
Show file tree
Hide file tree
Showing 24 changed files with 5,722 additions and 21 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ include extension-ci-tools/makefiles/duckdb_extension.Makefile

# Custom makefile targets
data: data_clean
python3 scripts/test_data_generator/generate_iceberg.py 0.01 data/iceberg/generated_spec1_0_01 1
python3 scripts/test_data_generator/generate_iceberg.py 0.01 data/iceberg/generated_spec2_0_01 2
python3 scripts/test_data_generator/generate_iceberg.py 0.001 data/iceberg/generated_spec1_0_01 1
python3 scripts/test_data_generator/generate_iceberg.py 0.001 data/iceberg/generated_spec2_0_01 2

data_large: data data_clean
python3 scripts/test_data_generator/generate_iceberg.py 1 data/iceberg/generated_spec2_1 2

data_clean:
rm -rf data/iceberg/generated_*
rm -rf data/iceberg/generated_*
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
{
"format-version" : 1,
"table-uuid" : "f1ef3dc3-e9ef-4cd4-a881-c0fe9bbe4a5b",
"location" : "data/iceberg/generated_spec1_0_01/pyspark_iceberg_table",
"last-updated-ms" : 1719577635792,
"last-column-id" : 15,
"schema" : {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey_bool",
"required" : false,
"type" : "boolean"
}, {
"id" : 2,
"name" : "l_partkey_int",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey_long",
"required" : false,
"type" : "long"
}, {
"id" : 4,
"name" : "l_extendedprice_float",
"required" : false,
"type" : "float"
}, {
"id" : 5,
"name" : "l_extendedprice_double",
"required" : false,
"type" : "double"
}, {
"id" : 6,
"name" : "l_extendedprice_dec9_2",
"required" : false,
"type" : "decimal(9, 2)"
}, {
"id" : 7,
"name" : "l_extendedprice_dec18_6",
"required" : false,
"type" : "decimal(18, 6)"
}, {
"id" : 8,
"name" : "l_extendedprice_dec38_10",
"required" : false,
"type" : "decimal(38, 10)"
}, {
"id" : 9,
"name" : "l_shipdate_date",
"required" : false,
"type" : "date"
}, {
"id" : 10,
"name" : "l_partkey_time",
"required" : false,
"type" : "int"
}, {
"id" : 11,
"name" : "l_commitdate_timestamp",
"required" : false,
"type" : "timestamp"
}, {
"id" : 12,
"name" : "l_commitdate_timestamp_tz",
"required" : false,
"type" : "timestamptz"
}, {
"id" : 13,
"name" : "l_comment_string",
"required" : false,
"type" : "string"
}, {
"id" : 14,
"name" : "uuid",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_comment_blob",
"required" : false,
"type" : "binary"
} ]
},
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey_bool",
"required" : false,
"type" : "boolean"
}, {
"id" : 2,
"name" : "l_partkey_int",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey_long",
"required" : false,
"type" : "long"
}, {
"id" : 4,
"name" : "l_extendedprice_float",
"required" : false,
"type" : "float"
}, {
"id" : 5,
"name" : "l_extendedprice_double",
"required" : false,
"type" : "double"
}, {
"id" : 6,
"name" : "l_extendedprice_dec9_2",
"required" : false,
"type" : "decimal(9, 2)"
}, {
"id" : 7,
"name" : "l_extendedprice_dec18_6",
"required" : false,
"type" : "decimal(18, 6)"
}, {
"id" : 8,
"name" : "l_extendedprice_dec38_10",
"required" : false,
"type" : "decimal(38, 10)"
}, {
"id" : 9,
"name" : "l_shipdate_date",
"required" : false,
"type" : "date"
}, {
"id" : 10,
"name" : "l_partkey_time",
"required" : false,
"type" : "int"
}, {
"id" : 11,
"name" : "l_commitdate_timestamp",
"required" : false,
"type" : "timestamp"
}, {
"id" : 12,
"name" : "l_commitdate_timestamp_tz",
"required" : false,
"type" : "timestamptz"
}, {
"id" : 13,
"name" : "l_comment_string",
"required" : false,
"type" : "string"
}, {
"id" : 14,
"name" : "uuid",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_comment_blob",
"required" : false,
"type" : "binary"
} ]
} ],
"partition-spec" : [ ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "peter",
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : 955407744644249037,
"refs" : {
"main" : {
"snapshot-id" : 955407744644249037,
"type" : "branch"
}
},
"snapshots" : [ {
"snapshot-id" : 955407744644249037,
"timestamp-ms" : 1719577635792,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1719577633178",
"added-data-files" : "1",
"added-records" : "6005",
"added-files-size" : "440910",
"changed-partition-count" : "1",
"total-records" : "6005",
"total-files-size" : "440910",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "data/iceberg/generated_spec1_0_01/pyspark_iceberg_table/metadata/snap-955407744644249037-1-435eebe7-026b-4d5c-a2bc-a92c5fc40db1.avro",
"schema-id" : 0
} ],
"statistics" : [ ],
"snapshot-log" : [ {
"timestamp-ms" : 1719577635792,
"snapshot-id" : 955407744644249037
} ],
"metadata-log" : [ ]
}
Loading

0 comments on commit 12d1758

Please sign in to comment.