NVIDIA · NVnavkumar · Jan 10, 2024 · Jan 9, 2024 · Jan 9, 2024 · Jan 9, 2024
diff --git a/integration_tests/src/main/python/dpp_test.py b/integration_tests/src/main/python/dpp_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,14 +20,17 @@
 from marks import ignore_order, allow_non_gpu
 from spark_session import is_before_spark_320, with_cpu_session, is_before_spark_312, is_databricks_runtime, is_databricks113_or_later
 
+# non-positive values here can produce a degenerative join, so we want a filter value associated
+# with only positive values. See https://github.com/NVIDIA/spark-rapids/issues/10147
+value_gen = RepeatSeqGen(int_gen, length=100) 
 
 def create_dim_table(table_name, table_format, length=500):
     def fn(spark):
         df = gen_df(spark, [
             ('key', IntegerGen(nullable=False, min_val=0, max_val=9, special_cases=[])),
             ('skey', IntegerGen(nullable=False, min_val=0, max_val=4, special_cases=[])),
             ('ex_key', IntegerGen(nullable=False, min_val=0, max_val=3, special_cases=[])),
-            ('value', int_gen),
+            ('value', value_gen),
             # specify nullable=False for `filter` to avoid generating invalid SQL with
             # expression `filter = None` (https://github.com/NVIDIA/spark-rapids/issues/9817)
             ('filter', RepeatSeqGen(
@@ -49,7 +52,7 @@ def fn(spark):
             ('skey', IntegerGen(nullable=False, min_val=0, max_val=4, special_cases=[])),
             # ex_key is not a partition column
             ('ex_key', IntegerGen(nullable=False, min_val=0, max_val=3, special_cases=[])),
-            ('value', int_gen)], length)
+            ('value', value_gen)], length)
         df.write.format(table_format) \
             .mode("overwrite") \
             .partitionBy('key', 'skey') \