Skip to content

Commit

Permalink
Xfail Databricks cases because its default rebase mode is legacy
Browse files Browse the repository at this point in the history
  • Loading branch information
Chong Gao committed Nov 20, 2023
1 parent 3f8bc40 commit c2b5ffb
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 0 deletions.
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/hive_write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,11 @@ def test_optimized_hive_bucketed_fallback(gens, storage, planned_write, spark_tm
"ExecutedCommandExec",
{"spark.sql.optimizer.plannedWrite.enabled": planned_write})

@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_hive_copy_ints_to_long(spark_tmp_table_factory):
do_hive_copy(spark_tmp_table_factory, int_gen, "INT", "BIGINT")

@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_hive_copy_longs_to_float(spark_tmp_table_factory):
do_hive_copy(spark_tmp_table_factory, long_gen, "BIGINT", "FLOAT")

Expand Down
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/parquet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,7 @@ def test_parquet_reading_from_unaligned_pages_basic_filters_with_nulls(spark_tmp
}

@pytest.mark.skipif(is_before_spark_330(), reason='Aggregate push down on Parquet is a new feature of Spark 330')
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_parquet_scan_without_aggregation_pushdown_not_fallback(spark_tmp_path):
"""
No aggregation will be pushed down in this test, so we should not fallback to CPU
Expand Down Expand Up @@ -1232,6 +1233,7 @@ def test_parquet_read_daytime_interval_cpu_file(spark_tmp_path):
lambda spark: spark.read.parquet(data_path))

@pytest.mark.skipif(is_before_spark_330(), reason='DayTimeInterval is not supported before Pyspark 3.3.0')
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_parquet_read_daytime_interval_gpu_file(spark_tmp_path):
data_path = spark_tmp_path + '/PARQUET_DATA'
gen_list = [('_c1', DayTimeIntervalGen())]
Expand Down
11 changes: 11 additions & 0 deletions integration_tests/src/main/python/parquet_write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def test_write_round_trip(spark_tmp_path, parquet_gens):
all_empty_map_gen]

@pytest.mark.parametrize('par_gen', par_write_odd_empty_strings_gens_sample, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_write_round_trip_corner(spark_tmp_path, par_gen):
gen_list = [('_c0', par_gen)]
data_path = spark_tmp_path + '/PAR_DATA'
Expand Down Expand Up @@ -239,6 +240,7 @@ def start(self, rand):
parquet_write_compress_options.append('zstd')

@pytest.mark.parametrize('compress', parquet_write_compress_options)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_compress_write_round_trip(spark_tmp_path, compress):
data_path = spark_tmp_path + '/PARQUET_DATA'
all_confs = {'spark.sql.parquet.compression.codec': compress}
Expand Down Expand Up @@ -313,6 +315,7 @@ def writeParquetNoOverwriteCatchException(spark, df, data_path, table_name):
df.coalesce(1).write.format("parquet").option("path", data_path).saveAsTable(table_name)
assert e_info.match(r".*already exists.*")

@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_ts_write_twice_fails_exception(spark_tmp_path, spark_tmp_table_factory):
gen = IntegerGen()
data_path = spark_tmp_path + '/PARQUET_DATA'
Expand Down Expand Up @@ -451,6 +454,7 @@ def sql_write(spark, path):
# This test is testing how the parquet_writer will behave if column has a validity mask without having any nulls.
# There is no straight forward to do it besides creating a vector with nulls and then dropping nulls
# cudf will create a vector with a null_mask even though we have just filtered them
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_write_map_nullable(spark_tmp_path):
data_path = spark_tmp_path + '/PARQUET_DATA'

Expand Down Expand Up @@ -509,6 +513,7 @@ def test_parquet_write_roundtrip_datetime_with_legacy_rebase(spark_tmp_path, dat

@pytest.mark.allow_non_gpu(*test_non_empty_ctas_non_gpu_execs)
@pytest.mark.parametrize('allow_non_empty', [True, False])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_non_empty_ctas(spark_tmp_path, spark_tmp_table_factory, allow_non_empty):
data_path = spark_tmp_path + "/CTAS"
conf = {
Expand Down Expand Up @@ -556,6 +561,7 @@ def get_nested_parquet_meta_data_for_field_id():


@pytest.mark.skipif(is_before_spark_330(), reason='Field ID is not supported before Spark 330')
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_parquet_write_field_id(spark_tmp_path):
data_path = spark_tmp_path + '/PARQUET_DATA'
schema, data = get_nested_parquet_meta_data_for_field_id()
Expand All @@ -573,6 +579,7 @@ def test_parquet_write_field_id(spark_tmp_path):
conf=enable_parquet_field_id_read)

@pytest.mark.skipif(is_before_spark_330(), reason='Field ID is not supported before Spark 330')
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_parquet_write_field_id_disabled(spark_tmp_path):
data_path = spark_tmp_path + '/PARQUET_DATA'
schema, data = get_nested_parquet_meta_data_for_field_id()
Expand Down Expand Up @@ -602,6 +609,7 @@ def test_write_daytime_interval(spark_tmp_path):

@ignore_order
@pytest.mark.skipif(is_before_spark_320(), reason="is only supported in Spark 320+")
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_concurrent_writer(spark_tmp_path):
data_path = spark_tmp_path + '/PARQUET_DATA'
assert_gpu_and_cpu_writes_are_equal_collect(
Expand Down Expand Up @@ -722,6 +730,7 @@ def write_partitions(spark, table_name):


@ignore_order(local=True)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_dynamic_partitioned_parquet_write(spark_tmp_table_factory, spark_tmp_path):

def create_input_table(spark):
Expand Down Expand Up @@ -807,6 +816,7 @@ def test_write_with_planned_write_enabled(spark_tmp_path, planned_write_enabled,
# Issue to test a known bug https://github.com/NVIDIA/spark-rapids/issues/8694 to avoid regression
@ignore_order
@allow_non_gpu("SortExec", "ShuffleExchangeExec")
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_write_list_struct_single_element(spark_tmp_path):
data_path = spark_tmp_path + '/PARQUET_DATA'
data_gen = ArrayGen(StructGen([('element', long_gen)], nullable=False), max_length=10, nullable=False)
Expand All @@ -818,6 +828,7 @@ def test_write_list_struct_single_element(spark_tmp_path):
assert_gpu_and_cpu_are_equal_collect(lambda spark: spark.read.parquet(cpu_path), conf)

@ignore_order
@pytest.mark.xfail(condition = is_not_utc(), reason = 'DB rebase mode is legacy: https://github.com/NVIDIA/spark-rapids/issues/9792')
def test_parquet_write_column_name_with_dots(spark_tmp_path):
data_path = spark_tmp_path + "/PARQUET_DATA"
gens = [
Expand Down

0 comments on commit c2b5ffb

Please sign in to comment.