From 1afead7c641b67baa747870bc11f509105ae5f46 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@nvidia.com>
Date: Tue, 21 Nov 2023 12:27:29 -0700
Subject: [PATCH 01/25] Specify nullable=False when generating filter values in
 dpp tests (#9818)

* Specify nullable=False when generating filter values in dpp tests

* signoff

Signed-off-by: Andy Grove <andygrove@nvidia.com>

* add comment

---------

Signed-off-by: Andy Grove <andygrove@nvidia.com>
---
 integration_tests/src/main/python/dpp_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/integration_tests/src/main/python/dpp_test.py b/integration_tests/src/main/python/dpp_test.py
index d3a33401c63..f56bb603ac4 100644
--- a/integration_tests/src/main/python/dpp_test.py
+++ b/integration_tests/src/main/python/dpp_test.py
@@ -28,8 +28,10 @@ def fn(spark):
             ('skey', IntegerGen(nullable=False, min_val=0, max_val=4, special_cases=[])),
             ('ex_key', IntegerGen(nullable=False, min_val=0, max_val=3, special_cases=[])),
             ('value', int_gen),
+            # specify nullable=False for `filter` to avoid generating invalid SQL with
+            # expression `filter = None` (https://github.com/NVIDIA/spark-rapids/issues/9817)
             ('filter', RepeatSeqGen(
-                IntegerGen(min_val=0, max_val=length, special_cases=[]), length=length // 20))
+                IntegerGen(min_val=0, max_val=length, special_cases=[], nullable=False), length=length // 20))
         ], length)
         df.cache()
         df.write.format(table_format) \

From f354ddfd89c3bd6ee2a9edb1db53dced247af9c0 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@nvidia.com>
Date: Tue, 21 Nov 2023 15:59:07 -0700
Subject: [PATCH 02/25] Support timestamp in `from_json` [databricks] (#9720)

* Support timestamp in from_json

* fix shims

* fix shims

* signoff

Signed-off-by: Andy Grove <andygrove@nvidia.com>

* improve tests

* fix 321db shim

* update compatibility guide

---------

Signed-off-by: Andy Grove <andygrove@nvidia.com>
---
 docs/compatibility.md                         | 10 ++-
 docs/supported_ops.md                         |  4 +-
 .../src/main/python/json_test.py              | 84 ++++++++++++++++++-
 .../com/nvidia/spark/rapids/GpuCast.scala     |  4 +-
 .../nvidia/spark/rapids/GpuOverrides.scala    |  3 +-
 .../catalyst/json/rapids/GpuJsonScan.scala    | 10 +++
 .../spark/sql/rapids/GpuJsonToStructs.scala   | 16 +++-
 .../rapids/shims/GpuJsonToStructsShim.scala   |  9 ++
 .../sql/catalyst/json/GpuJsonUtils.scala      |  5 ++
 .../sql/catalyst/json/GpuJsonUtils.scala      |  6 ++
 .../sql/catalyst/json/GpuJsonUtils.scala      |  6 ++
 .../rapids/shims/GpuJsonToStructsShim.scala   | 19 +++++
 .../sql/catalyst/json/GpuJsonUtils.scala      | 19 +++--
 13 files changed, 181 insertions(+), 14 deletions(-)

diff --git a/docs/compatibility.md b/docs/compatibility.md
index 370d61e5b0c..53b39ec251e 100644
--- a/docs/compatibility.md
+++ b/docs/compatibility.md
@@ -330,7 +330,15 @@ Dates are partially supported but there are some known issues:
   parsed as null ([#9664](https://github.com/NVIDIA/spark-rapids/issues/9664)) whereas Spark versions prior to 3.4 
   will parse these numbers as number of days since the epoch, and in Spark 3.4 and later, an exception will be thrown.
 
-Timestamps are not supported ([#9590](https://github.com/NVIDIA/spark-rapids/issues/9590)).
+Timestamps are partially supported but there are some known issues:
+
+- Only the default `timestampFormat` of `yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]` is supported. The query will fall back to CPU if any other format
+  is specified ([#9273](https://github.com/NVIDIA/spark-rapids/issues/9723))
+- Strings containing integers with more than four digits will be
+  parsed as null ([#9664](https://github.com/NVIDIA/spark-rapids/issues/9664)) whereas Spark versions prior to 3.4
+  will parse these numbers as number of days since the epoch, and in Spark 3.4 and later, an exception will be thrown.
+- Strings containing special date constant values such as `now` and `today` will parse as null ([#9724](https://github.com/NVIDIA/spark-rapids/issues/9724)),
+  which differs from the behavior in Spark 3.1.x
 
 When reading numeric values, the GPU implementation always supports leading zeros regardless of the setting
 for the JSON option `allowNumericLeadingZeros` ([#9588](https://github.com/NVIDIA/spark-rapids/issues/9588)).
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index 490ec771ab0..1566a291f36 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -8141,8 +8141,8 @@ are limited.
 <td> </td>
 <td> </td>
 <td><b>NS</b></td>
-<td><em>PS<br/>MAP only supports keys and values that are of STRING type;<br/>unsupported child types TIMESTAMP, NULL, BINARY, CALENDAR, MAP, UDT</em></td>
-<td><em>PS<br/>unsupported child types TIMESTAMP, NULL, BINARY, CALENDAR, MAP, UDT</em></td>
+<td><em>PS<br/>MAP only supports keys and values that are of STRING type;<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT</em></td>
 <td> </td>
 </tr>
 <tr>
diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
index e3f50727619..bb99a01425f 100644
--- a/integration_tests/src/main/python/json_test.py
+++ b/integration_tests/src/main/python/json_test.py
@@ -21,7 +21,7 @@
 from datetime import timezone
 from conftest import is_databricks_runtime
 from marks import approximate_float, allow_non_gpu, ignore_order
-from spark_session import with_cpu_session, with_gpu_session, is_before_spark_330, is_before_spark_340, \
+from spark_session import with_cpu_session, with_gpu_session, is_before_spark_320, is_before_spark_330, is_before_spark_340, \
     is_before_spark_341
 
 json_supported_gens = [
@@ -600,6 +600,88 @@ def test_from_json_struct_date_fallback_non_default_format(date_gen, date_format
         conf={"spark.rapids.sql.expression.JsonToStructs": True,
               'spark.sql.legacy.timeParserPolicy': 'CORRECTED'})
 
+@pytest.mark.parametrize('timestamp_gen', [
+    # "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"
+    "\"[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\\.[0-9]{1,6})?Z?[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]}?\"",
+    # "yyyy-MM-dd"
+    "\"[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?\"",
+    # "yyyy-MM"
+    "\"[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?[1-8]{1}[0-9]{3}-[0-3]{1,2}[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?\"",
+    # "yyyy"
+    "\"[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?[0-9]{4}[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?\"",
+    # "dd/MM/yyyy"
+    "\"[0-9]{2}/[0-9]{2}/[1-8]{1}[0-9]{3}\"",
+    # special constant values
+    pytest.param("\"(now|today|tomorrow|epoch)\"", marks=pytest.mark.xfail(condition=is_before_spark_320(), reason="https://github.com/NVIDIA/spark-rapids/issues/9724")),
+    # "nnnnn" (number of days since epoch prior to Spark 3.4, throws exception from 3.4)
+    pytest.param("\"[0-9]{5}\"", marks=pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9664")),
+    # integral
+    pytest.param("[0-9]{1,5}", marks=pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9588")),
+    "[1-9]{1,8}",
+    # floating-point
+    "[0-9]{0,2}\.[0-9]{1,2}"
+    # boolean
+    "(true|false)"
+])
+@pytest.mark.parametrize('timestamp_format', [
+    "",
+    "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]",
+    # https://github.com/NVIDIA/spark-rapids/issues/9723
+    pytest.param("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", marks=pytest.mark.allow_non_gpu('ProjectExec')),
+    pytest.param("dd/MM/yyyy'T'HH:mm:ss[.SSS][XXX]", marks=pytest.mark.allow_non_gpu('ProjectExec')),
+])
+@pytest.mark.parametrize('time_parser_policy', [
+    pytest.param("LEGACY", marks=pytest.mark.allow_non_gpu('ProjectExec')),
+    "CORRECTED"
+])
+@pytest.mark.parametrize('ansi_enabled', [ True, False ])
+def test_from_json_struct_timestamp(timestamp_gen, timestamp_format, time_parser_policy, ansi_enabled):
+    json_string_gen = StringGen(r'{ "a": ' + timestamp_gen + ' }') \
+        .with_special_case('{ "a": null }') \
+        .with_special_case('null')
+    options = { 'timestampFormat': timestamp_format } if len(timestamp_format) > 0 else { }
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.col('a'), f.from_json('a', 'struct<a:timestamp>', options)),
+        conf={"spark.rapids.sql.expression.JsonToStructs": True,
+              'spark.sql.legacy.timeParserPolicy': time_parser_policy,
+              'spark.sql.ansi.enabled': ansi_enabled })
+
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('timestamp_gen', ["\"[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\\.[0-9]{1,6})?Z?\""])
+@pytest.mark.parametrize('timestamp_format', [
+    "",
+    "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]",
+])
+def test_from_json_struct_timestamp_fallback_legacy(timestamp_gen, timestamp_format):
+    json_string_gen = StringGen(r'{ "a": ' + timestamp_gen + ' }') \
+        .with_special_case('{ "a": null }') \
+        .with_special_case('null')
+    options = { 'timestampFormat': timestamp_format } if len(timestamp_format) > 0 else { }
+    assert_gpu_fallback_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.col('a'), f.from_json('a', 'struct<a:timestamp>', options)),
+        'ProjectExec',
+        conf={"spark.rapids.sql.expression.JsonToStructs": True,
+              'spark.sql.legacy.timeParserPolicy': 'LEGACY'})
+
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('timestamp_gen', ["\"[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\\.[0-9]{1,6})?Z?\""])
+@pytest.mark.parametrize('timestamp_format', [
+    "yyyy-MM-dd'T'HH:mm:ss.SSSXXX",
+    "dd/MM/yyyy'T'HH:mm:ss[.SSS][XXX]",
+])
+def test_from_json_struct_timestamp_fallback_non_default_format(timestamp_gen, timestamp_format):
+    json_string_gen = StringGen(r'{ "a": ' + timestamp_gen + ' }') \
+        .with_special_case('{ "a": null }') \
+        .with_special_case('null')
+    options = { 'timestampFormat': timestamp_format } if len(timestamp_format) > 0 else { }
+    assert_gpu_fallback_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.col('a'), f.from_json('a', 'struct<a:timestamp>', options)),
+        'ProjectExec',
+        conf={"spark.rapids.sql.expression.JsonToStructs": True,
+              'spark.sql.legacy.timeParserPolicy': 'CORRECTED'})
 
 @pytest.mark.parametrize('schema', ['struct<teacher:string>',
                                     'struct<student:struct<name:string,age:int>>',
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
index 2d1ba0d3c3b..2f59cfba072 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
@@ -1379,7 +1379,7 @@ object GpuCast {
   }
 
   /** This method does not close the `input` ColumnVector. */
-  private def convertTimestampOrNull(
+  def convertTimestampOrNull(
       input: ColumnVector,
       regex: String,
       cudfFormat: String): ColumnVector = {
@@ -1463,7 +1463,7 @@ object GpuCast {
     }
   }
 
-  private def castStringToTimestamp(input: ColumnVector, ansiMode: Boolean): ColumnVector = {
+  def castStringToTimestamp(input: ColumnVector, ansiMode: Boolean): ColumnVector = {
 
     // special timestamps
     val today = DateUtils.currentDate()
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index bdeae65a975..8119e78d988 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -3570,7 +3570,8 @@ object GpuOverrides extends Logging {
       "Returns a struct value with the given `jsonStr` and `schema`",
       ExprChecks.projectOnly(
         TypeSig.STRUCT.nested(TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.STRING + TypeSig.integral +
-          TypeSig.fp + TypeSig.DECIMAL_64 + TypeSig.DECIMAL_128 + TypeSig.BOOLEAN + TypeSig.DATE) +
+          TypeSig.fp + TypeSig.DECIMAL_64 + TypeSig.DECIMAL_128 + TypeSig.BOOLEAN + TypeSig.DATE +
+          TypeSig.TIMESTAMP) +
           TypeSig.MAP.nested(TypeSig.STRING).withPsNote(TypeEnum.MAP,
           "MAP only supports keys and values that are of STRING type"),
         (TypeSig.STRUCT + TypeSig.MAP + TypeSig.ARRAY).nested(TypeSig.all),
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala
index 5c730bc23bf..c4840839616 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala
@@ -121,6 +121,16 @@ object GpuJsonScan {
       }
     }
 
+    val hasTimestamps = TrampolineUtil.dataTypeExistsRecursively(dt, _.isInstanceOf[TimestampType])
+    if (hasTimestamps) {
+      GpuJsonUtils.optionalTimestampFormatInRead(parsedOptions) match {
+        case None | Some("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]") =>
+          // this is fine
+        case timestampFormat =>
+          meta.willNotWorkOnGpu(s"GpuJsonToStructs unsupported timestampFormat $timestampFormat")
+      }
+    }
+
     if (LegacyBehaviorPolicyShim.isLegacyTimeParserPolicy) {
       meta.willNotWorkOnGpu("LEGACY timeParserPolicy is not supported in GpuJsonToStructs")
     }
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala
index f2de53483b0..3447c91d861 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala
@@ -20,7 +20,7 @@ import ai.rapids.cudf
 import ai.rapids.cudf.{ColumnVector, ColumnView, DType, Scalar}
 import com.nvidia.spark.rapids.{GpuColumnVector, GpuScalar, GpuUnaryExpression}
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
-import com.nvidia.spark.rapids.GpuCast.doCast
+import com.nvidia.spark.rapids.GpuCast
 import com.nvidia.spark.rapids.RapidsPluginImplicits.AutoCloseableProducingSeq
 import com.nvidia.spark.rapids.jni.MapUtils
 import com.nvidia.spark.rapids.shims.GpuJsonToStructsShim
@@ -215,7 +215,13 @@ case class GpuJsonToStructs(
                     GpuJsonToStructsShim.castJsonStringToDate(col, options)
                   case (_, DataTypes.DateType) =>
                     castToNullDate(input.getBase)
-                  case _ => doCast(col, sparkType, dtype)
+                  case (DataTypes.StringType, DataTypes.TimestampType) =>
+                    GpuJsonToStructsShim.castJsonStringToTimestamp(col, options)
+                  case (DataTypes.LongType, DataTypes.TimestampType) =>
+                    GpuCast.castLongToTimestamp(col, DataTypes.TimestampType)
+                  case (_, DataTypes.TimestampType) =>
+                    castToNullTimestamp(input.getBase)
+                  case _ => GpuCast.doCast(col, sparkType, dtype)
                 }
 
               }
@@ -267,6 +273,12 @@ case class GpuJsonToStructs(
     }
   }
 
+  private def castToNullTimestamp(input: ColumnVector): ColumnVector = {
+    withResource(Scalar.fromNull(DType.TIMESTAMP_MICROSECONDS)) { nullScalar =>
+      ColumnVector.fromScalar(nullScalar, input.getRowCount.toInt)
+    }
+  }
+
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala
index 7e6709388f3..1fae5e7c5dc 100644
--- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala
@@ -57,4 +57,13 @@ object GpuJsonToStructsShim {
     }
   }
 
+  def castJsonStringToTimestamp(input: ColumnVector,
+      options: Map[String, String]): ColumnVector = {
+    withResource(Scalar.fromString(" ")) { space =>
+      withResource(input.strip(space)) { trimmed =>
+        // from_json doesn't respect ansi mode
+        GpuCast.castStringToTimestamp(trimmed, ansiMode = false)
+      }
+    }
+  }
 }
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 6a3c63ca2e9..7b7b680db24 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -42,6 +42,11 @@ object GpuJsonUtils {
   def dateFormatInRead(options: Map[String, String]): String =
     dateFormatInRead(parseJSONReadOptions(options))
 
+  def optionalTimestampFormatInRead(options: JSONOptions): Option[String] =
+    Some(options.timestampFormat)
+  def optionalTimestampFormatInRead(options: Map[String, String]): Option[String] =
+    optionalTimestampFormatInRead(parseJSONReadOptions(options))
+
   def timestampFormatInRead(options: JSONOptions): String = options.timestampFormat
   def enableDateTimeParsingFallback(options: JSONOptions): Boolean = false
 
diff --git a/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index ab673be12f5..5f1d8929887 100644
--- a/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -36,6 +36,12 @@ object GpuJsonUtils {
   def dateFormatInRead(options: Map[String, String]): String =
     dateFormatInRead(parseJSONReadOptions(options))
 
+  def optionalTimestampFormatInRead(options: JSONOptions): Option[String] =
+    options.timestampFormatInRead
+
+  def optionalTimestampFormatInRead(options: Map[String, String]): Option[String] =
+    optionalTimestampFormatInRead(parseJSONReadOptions(options))
+
   def timestampFormatInRead(options: JSONOptions): String = options.timestampFormatInRead.getOrElse(
     if (SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY) {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 9a1e34bfe12..33989821009 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -43,6 +43,12 @@ object GpuJsonUtils {
   def dateFormatInRead(options: Map[String, String]): String =
     dateFormatInRead(parseJSONReadOptions(options))
 
+  def optionalTimestampFormatInRead(options: JSONOptions): Option[String] =
+    options.timestampFormatInRead
+
+  def optionalTimestampFormatInRead(options: Map[String, String]): Option[String] =
+    optionalTimestampFormatInRead(parseJSONReadOptions(options))
+
   def timestampFormatInRead(options: JSONOptions): String = options.timestampFormatInRead.getOrElse(
     if (SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY) {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala
index 88560143f2e..0ae9bb3d839 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala
@@ -46,4 +46,23 @@ object GpuJsonToStructsShim {
     }
   }
 
+  def castJsonStringToTimestamp(input: ColumnVector,
+      options: Map[String, String]): ColumnVector = {
+    options.get("timestampFormat") match {
+      case None =>
+        // legacy behavior
+        withResource(Scalar.fromString(" ")) { space =>
+          withResource(input.strip(space)) { trimmed =>
+            // from_json doesn't respect ansi mode
+            GpuCast.castStringToTimestamp(trimmed, ansiMode = false)
+          }
+        }
+      case Some("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]") =>
+        GpuCast.convertTimestampOrNull(input,
+          "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,6})?Z?$", "%Y-%m-%d")
+      case other =>
+        // should be unreachable due to GpuOverrides checks
+        throw new IllegalStateException(s"Unsupported timestampFormat $other")
+      }
+  }
 }
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 92c1c17bba5..4685cc0d289 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -33,11 +33,7 @@ object GpuJsonUtils {
     options.dateFormatInRead
 
   def optionalDateFormatInRead(options: Map[String, String]): Option[String] = {
-    val parsedOptions = new JSONOptionsInRead(
-      options,
-      SQLConf.get.sessionLocalTimeZone,
-      SQLConf.get.columnNameOfCorruptRecord)
-    optionalDateFormatInRead(parsedOptions)
+    optionalDateFormatInRead(parseJSONReadOptions(options))
   }
 
   /**
@@ -51,6 +47,12 @@ object GpuJsonUtils {
   def dateFormatInRead(options: JSONOptions): String =
     options.dateFormatInRead.getOrElse(DateFormatter.defaultPattern)
 
+  def optionalTimestampFormatInRead(options: JSONOptions): Option[String] =
+    options.timestampFormatInRead
+
+  def optionalTimestampFormatInRead(options: Map[String, String]): Option[String] =
+    optionalTimestampFormatInRead(parseJSONReadOptions(options))
+
   def timestampFormatInRead(options: JSONOptions): String = options.timestampFormatInRead.getOrElse(
     if (LegacyBehaviorPolicyShim.isLegacyTimeParserPolicy()) {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
@@ -60,4 +62,11 @@ object GpuJsonUtils {
 
   def enableDateTimeParsingFallback(options: JSONOptions): Boolean =
     options.enableDateTimeParsingFallback.getOrElse(false)
+
+  def parseJSONReadOptions(options: Map[String, String]) = {
+    new JSONOptionsInRead(
+      options,
+      SQLConf.get.sessionLocalTimeZone,
+      SQLConf.get.columnNameOfCorruptRecord)
+  }
 }

From 7963612a50a1f753a2e6d5553229cdf461d7d1ed Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 21 Nov 2023 20:47:53 -0600
Subject: [PATCH 03/25] Set seed to 0 for test_hash_reduction_sum (#9826)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 integration_tests/src/main/python/hash_aggregate_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index a9300a51c79..4ecf42a9b42 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -381,6 +381,7 @@ def test_hash_grpby_sum_full_decimal(data_gen, conf):
         conf = conf)
 
 @approximate_float
+@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9822")
 @ignore_order
 @incompat
 @pytest.mark.parametrize('data_gen', numeric_gens + decimal_gens + [DecimalGen(precision=36, scale=5)], ids=idfn)

From bdc45cb9264ac19d4058c93072bdeaa4e738fc8a Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 21 Nov 2023 20:52:26 -0600
Subject: [PATCH 04/25] Update timestamp gens to avoid "year 0 is out of range"
 errors (#9821)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 integration_tests/src/main/python/delta_lake_write_test.py  | 2 +-
 .../src/main/python/fastparquet_compatibility_test.py       | 2 +-
 integration_tests/src/main/python/parquet_write_test.py     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/integration_tests/src/main/python/delta_lake_write_test.py b/integration_tests/src/main/python/delta_lake_write_test.py
index 5e4163a9ecc..b62a6992ff0 100644
--- a/integration_tests/src/main/python/delta_lake_write_test.py
+++ b/integration_tests/src/main/python/delta_lake_write_test.py
@@ -412,7 +412,7 @@ def setup_tables(spark):
 @pytest.mark.parametrize("ts_write", ["INT96", "TIMESTAMP_MICROS", "TIMESTAMP_MILLIS"], ids=idfn)
 @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x")
 def test_delta_write_legacy_timestamp(spark_tmp_path, ts_write):
-    gen = TimestampGen(start=datetime(1, 1, 1, tzinfo=timezone.utc),
+    gen = TimestampGen(start=datetime(1, 2, 1, tzinfo=timezone.utc),
                        end=datetime(2000, 1, 1, tzinfo=timezone.utc)).with_special_case(
         datetime(1000, 1, 1, tzinfo=timezone.utc), weight=10.0)
     data_path = spark_tmp_path + "/DELTA_DATA"
diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py
index d2636d58d01..6ec5ec88fd3 100644
--- a/integration_tests/src/main/python/fastparquet_compatibility_test.py
+++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py
@@ -192,7 +192,7 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
                  start=pandas_min_datetime,
                  end=pandas_max_datetime),  # Vanilla case.
     pytest.param(TimestampGen(nullable=False,
-                              start=datetime(1, 1, 1, tzinfo=timezone.utc),
+                              start=datetime(1, 2, 1, tzinfo=timezone.utc),
                               end=pandas_min_datetime),
                  marks=pytest.mark.xfail(reason="fastparquet reads timestamps preceding 1900 incorrectly.")),
 ], ids=idfn)
diff --git a/integration_tests/src/main/python/parquet_write_test.py b/integration_tests/src/main/python/parquet_write_test.py
index bd330b569bb..8a74973b0be 100644
--- a/integration_tests/src/main/python/parquet_write_test.py
+++ b/integration_tests/src/main/python/parquet_write_test.py
@@ -72,7 +72,7 @@
 
 parquet_datetime_gen_simple = [DateGen(start=date(1, 1, 1), end=date(2000, 1, 1))
                                .with_special_case(date(1000, 1, 1), weight=10.0),
-                               TimestampGen(start=datetime(1, 1, 1, tzinfo=timezone.utc),
+                               TimestampGen(start=datetime(1, 2, 1, tzinfo=timezone.utc),
                                             end=datetime(2000, 1, 1, tzinfo=timezone.utc))
                                .with_special_case(datetime(1000, 1, 1, tzinfo=timezone.utc), weight=10.0)]
 parquet_datetime_in_struct_gen = [
@@ -280,8 +280,8 @@ def writeParquetUpgradeCatchException(spark, df, data_path, spark_tmp_table_fact
 
 @pytest.mark.parametrize('ts_write_data_gen',
                         [('INT96', TimestampGen()),
-                         ('TIMESTAMP_MICROS', TimestampGen(start=datetime(1, 1, 1, tzinfo=timezone.utc), end=datetime(1899, 12, 31, tzinfo=timezone.utc))),
-                         ('TIMESTAMP_MILLIS', TimestampGen(start=datetime(1, 1, 1, tzinfo=timezone.utc), end=datetime(1899, 12, 31, tzinfo=timezone.utc)))])
+                         ('TIMESTAMP_MICROS', TimestampGen(start=datetime(1, 2, 1, tzinfo=timezone.utc), end=datetime(1899, 12, 31, tzinfo=timezone.utc))),
+                         ('TIMESTAMP_MILLIS', TimestampGen(start=datetime(1, 2, 1, tzinfo=timezone.utc), end=datetime(1899, 12, 31, tzinfo=timezone.utc)))])
 @pytest.mark.parametrize('rebase', ["CORRECTED","EXCEPTION"])
 def test_ts_write_fails_datetime_exception(spark_tmp_path, ts_write_data_gen, spark_tmp_table_factory, rebase):
     ts_write, gen = ts_write_data_gen

From 908e9869465918369a4e1a9a4ee95c30376cac7f Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Wed, 22 Nov 2023 08:21:12 -0600
Subject: [PATCH 05/25] Add GpuCheckOverflowInTableInsert to Databricks 11.3+
 (#9800)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 .../src/main/python/parquet_write_test.py     | 21 +++++
 .../rapids/shims/Spark330PlusDBShims.scala    | 87 +++++++++++++++++++
 .../spark/rapids/shims/SparkShims.scala       | 50 +----------
 .../GpuCheckOverflowInTableInsert.scala       |  3 +
 ...ims.scala => Spark331PlusNonDBShims.scala} |  2 +-
 .../spark/rapids/shims/SparkShims.scala       |  2 +-
 .../spark/rapids/shims/SparkShims.scala       |  2 +-
 .../rapids/shims/Spark332PlusDBShims.scala    | 46 ++--------
 ...ims.scala => Spark340PlusNonDBShims.scala} |  2 +-
 .../spark/rapids/shims/SparkShims.scala       |  2 +-
 .../spark/rapids/shims/SparkShims.scala       |  2 +-
 11 files changed, 126 insertions(+), 93 deletions(-)
 create mode 100644 sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala
 rename sql-plugin/src/main/{spark331 => spark330db}/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala (97%)
 rename sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/{Spark331PlusShims.scala => Spark331PlusNonDBShims.scala} (97%)
 rename sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/{Spark340PlusShims.scala => Spark340PlusNonDBShims.scala} (99%)

diff --git a/integration_tests/src/main/python/parquet_write_test.py b/integration_tests/src/main/python/parquet_write_test.py
index 8a74973b0be..c83939f4774 100644
--- a/integration_tests/src/main/python/parquet_write_test.py
+++ b/integration_tests/src/main/python/parquet_write_test.py
@@ -818,3 +818,24 @@ def test_parquet_write_column_name_with_dots(spark_tmp_path):
         lambda spark, path:  gen_df(spark, gens).coalesce(1).write.parquet(path),
         lambda spark, path: spark.read.parquet(path),
         data_path)
+
+@ignore_order
+def test_parquet_append_with_downcast(spark_tmp_table_factory, spark_tmp_path):
+    data_path = spark_tmp_path + "/PARQUET_DATA"
+    cpu_table = spark_tmp_table_factory.get()
+    gpu_table = spark_tmp_table_factory.get()
+    def setup_tables(spark):
+        df = unary_op_df(spark, int_gen, length=10)
+        df.write.format("parquet").option("path", data_path + "/CPU").saveAsTable(cpu_table)
+        df.write.format("parquet").option("path", data_path + "/GPU").saveAsTable(gpu_table)
+    with_cpu_session(setup_tables)
+    def do_append(spark, path):
+        table = cpu_table
+        if path.endswith("/GPU"):
+            table = gpu_table
+        unary_op_df(spark, LongGen(min_val=0, max_val=128, special_cases=[]), length=10)\
+            .write.mode("append").saveAsTable(table)
+    assert_gpu_and_cpu_writes_are_equal_collect(
+        do_append,
+        lambda spark, path: spark.read.parquet(path),
+        data_path)
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala
new file mode 100644
index 00000000000..cb45d0fa440
--- /dev/null
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330db"}
+{"spark": "332db"}
+{"spark": "341db"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
+import org.apache.spark.sql.execution.{ColumnarToRowTransition, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec
+import org.apache.spark.sql.execution.exchange.{EXECUTOR_BROADCAST, ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.rapids.{GpuCheckOverflowInTableInsert, GpuElementAtMeta}
+import org.apache.spark.sql.rapids.execution.{GpuBroadcastHashJoinExec, GpuBroadcastNestedLoopJoinExec}
+
+trait Spark330PlusDBShims extends Spark321PlusDBShims {
+  override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
+    val shimExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(
+      GpuOverrides.expr[CheckOverflowInTableInsert](
+        "Casting a numeric value as another numeric type in store assignment",
+        ExprChecks.unaryProjectInputMatchesOutput(
+          TypeSig.all,
+          TypeSig.all),
+        (t, conf, p, r) => new UnaryExprMeta[CheckOverflowInTableInsert](t, conf, p, r) {
+          override def convertToGpu(child: Expression): GpuExpression = {
+            child match {
+              case c: GpuCast => GpuCheckOverflowInTableInsert(c, t.columnName)
+              case _ =>
+                throw new IllegalStateException("Expression child is not of Type GpuCast")
+            }
+          }
+        }),
+      GpuElementAtMeta.elementAtRule(true)
+    ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
+    super.getExprs ++ shimExprs ++ DayTimeIntervalShims.exprs ++ RoundingShims.exprs
+  }
+
+  override def getExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] =
+    super.getExecs ++ PythonMapInArrowExecShims.execs
+
+  override def reproduceEmptyStringBug: Boolean = false
+
+  override def isExecutorBroadcastShuffle(shuffle: ShuffleExchangeLike): Boolean = {
+    shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
+  }
+
+  override def shuffleParentReadsShuffleData(shuffle: ShuffleExchangeLike,
+      parent: SparkPlan): Boolean = {
+    parent match {
+      case _: GpuBroadcastHashJoinExec =>
+        shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
+      case _: GpuBroadcastNestedLoopJoinExec =>
+        shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
+      case _ => false
+    }
+  }
+
+
+  override def addRowShuffleToQueryStageTransitionIfNeeded(c2r: ColumnarToRowTransition,
+      sqse: ShuffleQueryStageExec): SparkPlan = {
+    val plan = GpuTransitionOverrides.getNonQueryStagePlan(sqse)
+    plan match {
+      case shuffle: ShuffleExchangeLike if shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST) =>
+        ShuffleExchangeExec(SinglePartition, c2r, EXECUTOR_BROADCAST)
+      case _ =>
+        c2r
+    }
+  }
+}
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index 92a52ce3fcf..84f5c4e4886 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -21,29 +21,13 @@ package com.nvidia.spark.rapids.shims
 
 import com.nvidia.spark.rapids._
 
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
-import org.apache.spark.sql.execution.{ColumnarToRowTransition, SparkPlan}
-import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, DataWritingCommand, RunnableCommand}
-import org.apache.spark.sql.execution.exchange.{EXECUTOR_BROADCAST, ShuffleExchangeExec, ShuffleExchangeLike}
-import org.apache.spark.sql.rapids.GpuElementAtMeta
-import org.apache.spark.sql.rapids.execution.{GpuBroadcastHashJoinExec, GpuBroadcastNestedLoopJoinExec}
 
-object SparkShimImpl extends Spark321PlusDBShims {
+object SparkShimImpl extends Spark330PlusDBShims {
   // AnsiCast is removed from Spark3.4.0
   override def ansiCastRule: ExprRule[_ <: Expression] = null
 
-  override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
-    val elementAtExpr: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(
-      GpuElementAtMeta.elementAtRule(true)
-    ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
-    super.getExprs ++ DayTimeIntervalShims.exprs ++ RoundingShims.exprs ++ elementAtExpr
-  }
-
-  override def getExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] =
-    super.getExecs ++ PythonMapInArrowExecShims.execs
-
   override def getDataWriteCmds: Map[Class[_ <: DataWritingCommand],
       DataWritingCommandRule[_ <: DataWritingCommand]] = {
     Seq(GpuOverrides.dataWriteCmd[CreateDataSourceTableAsSelectCommand](
@@ -56,32 +40,4 @@ object SparkShimImpl extends Spark321PlusDBShims {
       RunnableCommandRule[_ <: RunnableCommand]] = {
       Map.empty
   }
-
-  override def reproduceEmptyStringBug: Boolean = false
-
-  override def isExecutorBroadcastShuffle(shuffle: ShuffleExchangeLike): Boolean = {
-    shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
-  }
-
-  override def shuffleParentReadsShuffleData(shuffle: ShuffleExchangeLike,
-      parent: SparkPlan): Boolean = {
-    parent match {
-      case _: GpuBroadcastHashJoinExec =>
-        shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
-      case _: GpuBroadcastNestedLoopJoinExec =>
-        shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
-      case _ => false
-    }
-  }
-
-  override def addRowShuffleToQueryStageTransitionIfNeeded(c2r: ColumnarToRowTransition,
-      sqse: ShuffleQueryStageExec): SparkPlan = {
-    val plan = GpuTransitionOverrides.getNonQueryStagePlan(sqse)
-    plan match {
-      case shuffle: ShuffleExchangeLike if shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST) =>
-        ShuffleExchangeExec(SinglePartition, c2r, EXECUTOR_BROADCAST)
-      case _ =>
-        c2r
-    }
-  }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark331/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
similarity index 97%
rename from sql-plugin/src/main/spark331/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
rename to sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
index 3618f749bbd..7906b522d70 100644
--- a/sql-plugin/src/main/spark331/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
@@ -15,12 +15,15 @@
  */
 
 /*** spark-rapids-shim-json-lines
+{"spark": "330db"}
 {"spark": "331"}
 {"spark": "332"}
 {"spark": "332cdh"}
+{"spark": "332db"}
 {"spark": "333"}
 {"spark": "340"}
 {"spark": "341"}
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
diff --git a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusShims.scala b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala
similarity index 97%
rename from sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusShims.scala
rename to sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala
index 2d49bf7dc2c..4a346235627 100644
--- a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusShims.scala
+++ b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala
@@ -30,7 +30,7 @@ import com.nvidia.spark.rapids.{ExprChecks, ExprRule, GpuCast, GpuExpression, Gp
 import org.apache.spark.sql.catalyst.expressions.{CheckOverflowInTableInsert, Expression}
 import org.apache.spark.sql.rapids.GpuCheckOverflowInTableInsert
 
-trait Spark331PlusShims extends Spark330PlusNonDBShims {
+trait Spark331PlusNonDBShims extends Spark330PlusNonDBShims {
   override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
     val map: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(
       // Add expression CheckOverflowInTableInsert starting Spark-3.3.1+
diff --git a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index a201fe1de0c..eeede6ed2d9 100644
--- a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -25,7 +25,7 @@ import com.nvidia.spark.rapids._
 
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, DataWritingCommand, RunnableCommand}
 
-object SparkShimImpl extends Spark331PlusShims with AnsiCastRuleShims {
+object SparkShimImpl extends Spark331PlusNonDBShims with AnsiCastRuleShims {
   override def getDataWriteCmds: Map[Class[_ <: DataWritingCommand],
       DataWritingCommandRule[_ <: DataWritingCommand]] = {
     Seq(GpuOverrides.dataWriteCmd[CreateDataSourceTableAsSelectCommand](
diff --git a/sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index d43a8092b33..517b52b7218 100644
--- a/sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -19,4 +19,4 @@
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
-object SparkShimImpl extends Spark33cdhShims with Spark331PlusShims {}
+object SparkShimImpl extends Spark33cdhShims with Spark331PlusNonDBShims {}
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala
index 563dcfac8e7..43ef6118746 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala
@@ -23,16 +23,11 @@ package com.nvidia.spark.rapids.shims
 import com.nvidia.spark.rapids._
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
-import org.apache.spark.sql.execution.{ColumnarToRowTransition, SparkPlan}
-import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, DataWritingCommand, RunnableCommand}
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.exchange.{EXECUTOR_BROADCAST, ShuffleExchangeExec, ShuffleExchangeLike}
-import org.apache.spark.sql.rapids.GpuElementAtMeta
-import org.apache.spark.sql.rapids.execution.{GpuBroadcastHashJoinExec, GpuBroadcastNestedLoopJoinExec}
 
-trait Spark332PlusDBShims extends Spark321PlusDBShims {
+trait Spark332PlusDBShims extends Spark330PlusDBShims {
   // AnsiCast is removed from Spark3.4.0
   override def ansiCastRule: ExprRule[_ <: Expression] = null
 
@@ -45,10 +40,9 @@ trait Spark332PlusDBShims extends Spark321PlusDBShims {
         (a, conf, p, r) => new UnaryExprMeta[KnownNullable](a, conf, p, r) {
           override def convertToGpu(child: Expression): GpuExpression = GpuKnownNullable(child)
         }
-      ),
-      GpuElementAtMeta.elementAtRule(true)
+      )
     ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
-    super.getExprs ++ shimExprs ++ DayTimeIntervalShims.exprs ++ RoundingShims.exprs
+    super.getExprs ++ shimExprs
   }
 
   private val shimExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] = Seq(
@@ -63,7 +57,7 @@ trait Spark332PlusDBShims extends Spark321PlusDBShims {
   ).map(r => (r.getClassFor.asSubclass(classOf[SparkPlan]), r)).toMap
 
   override def getExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] =
-    super.getExecs ++ shimExecs ++ PythonMapInArrowExecShims.execs
+    super.getExecs ++ shimExecs
 
   override def getDataWriteCmds: Map[Class[_ <: DataWritingCommand],
     DataWritingCommandRule[_ <: DataWritingCommand]] = {
@@ -78,32 +72,4 @@ trait Spark332PlusDBShims extends Spark321PlusDBShims {
         (a, conf, p, r) => new CreateDataSourceTableAsSelectCommandMeta(a, conf, p, r))
     ).map(r => (r.getClassFor.asSubclass(classOf[RunnableCommand]), r)).toMap
   }
-
-  override def reproduceEmptyStringBug: Boolean = false
-
-  override def isExecutorBroadcastShuffle(shuffle: ShuffleExchangeLike): Boolean = {
-    shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
-  }
-
-  override def shuffleParentReadsShuffleData(shuffle: ShuffleExchangeLike,
-                                             parent: SparkPlan): Boolean = {
-    parent match {
-      case _: GpuBroadcastHashJoinExec =>
-        shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
-      case _: GpuBroadcastNestedLoopJoinExec =>
-        shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST)
-      case _ => false
-    }
-  }
-
-  override def addRowShuffleToQueryStageTransitionIfNeeded(c2r: ColumnarToRowTransition,
-      sqse: ShuffleQueryStageExec): SparkPlan = {
-    val plan = GpuTransitionOverrides.getNonQueryStagePlan(sqse)
-    plan match {
-      case shuffle: ShuffleExchangeLike if shuffle.shuffleOrigin.equals(EXECUTOR_BROADCAST) =>
-        ShuffleExchangeExec(SinglePartition, c2r, EXECUTOR_BROADCAST)
-      case _ =>
-        c2r
-    }
-  }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
similarity index 99%
rename from sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusShims.scala
rename to sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
index 3462086b0b1..2db727f14e0 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.exchange.ENSURE_REQUIREMENTS
 import org.apache.spark.sql.rapids.GpuElementAtMeta
 import org.apache.spark.sql.rapids.GpuV1WriteUtils.GpuEmpty2Null
 
-trait Spark340PlusShims extends Spark331PlusShims {
+trait Spark340PlusNonDBShims extends Spark331PlusNonDBShims {
 
   private val shimExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] = Seq(
     GpuOverrides.exec[GlobalLimitExec](
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index a91f7263372..d2031435147 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -20,4 +20,4 @@
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
-object SparkShimImpl extends Spark340PlusShims
+object SparkShimImpl extends Spark340PlusNonDBShims
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index 75a13143a94..7231030bdee 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, PythonUDAF, ToPret
 import org.apache.spark.sql.rapids.execution.python.GpuPythonUDAF
 import org.apache.spark.sql.types.StringType
 
-object SparkShimImpl extends Spark340PlusShims {
+object SparkShimImpl extends Spark340PlusNonDBShims {
 
   override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
     val shimExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(

From cd3f85ff3bf9c0ab1b2877fd272fe72f065fa3ac Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Wed, 22 Nov 2023 11:16:57 -0600
Subject: [PATCH 06/25] UCX 1.15 upgrade (#9824)

* UCX 1.15 upgrade

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>

* Also update scala 2.13

---------

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>
---
 .../Dockerfile.rocky_no_rdma                     | 13 ++++++++-----
 .../Dockerfile.rocky_rdma                        | 13 ++++++++-----
 .../Dockerfile.ubuntu_no_rdma                    | 15 +++++++++------
 .../Dockerfile.ubuntu_rdma                       | 16 ++++++++++------
 jenkins/Dockerfile-blossom.multi                 |  2 +-
 jenkins/Dockerfile-blossom.ubuntu                |  5 +++--
 pom.xml                                          |  2 +-
 scala2.13/pom.xml                                |  2 +-
 8 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma
index 4be0562123a..adf28f5fea2 100644
--- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma
+++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma
@@ -17,23 +17,26 @@
 #
 # The parameters are: 
 #   - CUDA_VER: 11.8.0 by default
-#   - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
-#                               CUDA runtime from the UCX github repo.
-#                               See: https://github.com/openucx/ucx/releases/
+#   - UCX_VER, UCX_CUDA_VER, and UCX_ARCH: 
+#       Used to pick a package matching a specific UCX version and
+#       CUDA runtime from the UCX github repo.
+#       See: https://github.com/openucx/ucx/releases/
 #   - ROCKY_VER: Rocky Linux OS version
 
 ARG CUDA_VER=11.8.0
-ARG UCX_VER=1.14.0
+ARG UCX_VER=1.15.0
 ARG UCX_CUDA_VER=11
+ARG UCX_ARCH=x86_64
 ARG ROCKY_VER=8
 FROM nvidia/cuda:${CUDA_VER}-runtime-rockylinux${ROCKY_VER}
 ARG UCX_VER
 ARG UCX_CUDA_VER
+ARG UCX_ARCH
 
 RUN yum update -y && yum install -y wget bzip2 numactl-libs libgomp
 RUN ls /usr/lib
 RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
-  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
+  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
   tar -xvf *.bz2 && \
   rpm -i ucx-$UCX_VER*.rpm && \
   rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \
diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma
index c5055e61ec6..9083e1561b5 100644
--- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma
+++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma
@@ -17,22 +17,25 @@
 #
 # The parameters are: 
 #   - CUDA_VER: 11.8.0 by default
-#   - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
-#                               CUDA runtime from the UCX github repo.
-#                               See: https://github.com/openucx/ucx/releases/
+#   - UCX_VER, UCX_CUDA_VER, and UCX_ARCH: 
+#       Used to pick a package matching a specific UCX version and
+#       CUDA runtime from the UCX github repo.
+#       See: https://github.com/openucx/ucx/releases/
 #   - ROCKY_VER: Rocky Linux OS version
 
 ARG CUDA_VER=11.8.0
-ARG UCX_VER=1.14.0
+ARG UCX_VER=1.15.0
 ARG UCX_CUDA_VER=11
+ARG UCX_ARCH=x86_64
 ARG ROCKY_VER=8
 FROM nvidia/cuda:${CUDA_VER}-runtime-rockylinux${ROCKY_VER}
 ARG UCX_VER
 ARG UCX_CUDA_VER
+ARG UCX_ARCH
 
 RUN yum update -y && yum install -y wget bzip2 rdma-core numactl-libs libgomp libibverbs librdmacm
 RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
-  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
+  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
   tar -xvf *.bz2 && \
   rpm -i ucx-$UCX_VER*.rpm && \
   rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \
diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma
index 8d6fc1671bc..e0318a0de60 100644
--- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma
+++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma
@@ -17,21 +17,24 @@
 #
 # The parameters are: 
 #   - CUDA_VER: 11.8.0 by default
-#   - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and 
-#                               CUDA runtime from the UCX github repo.
-#                               See: https://github.com/openucx/ucx/releases/
+#   - UCX_VER, UCX_CUDA_VER, and UCX_ARCH: 
+#       Used to pick a package matching a specific UCX version and
+#       CUDA runtime from the UCX github repo.
+#       See: https://github.com/openucx/ucx/releases/
 #   - UBUNTU_VER: 20.04 by default
 #
 
 ARG CUDA_VER=11.8.0
-ARG UCX_VER=1.14.0
+ARG UCX_VER=1.15.0
 ARG UCX_CUDA_VER=11
+ARG UCX_ARCH=x86_64
 ARG UBUNTU_VER=20.04
 
 FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
 ARG UCX_VER
 ARG UCX_CUDA_VER
 ARG UBUNTU_VER
+ARG UCX_ARCH
 
 RUN apt-get update && apt-get install -y gnupg2
 # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
@@ -41,7 +44,7 @@ RUN CUDA_UBUNTU_VER=`echo "$UBUNTU_VER"| sed -s 's/\.//'` && \
 RUN apt update
 RUN apt-get install -y wget
 RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
-  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
-  tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
+  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
+  tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
   apt install -y /tmp/ucx_install/*.deb && \
   rm -rf /tmp/ucx_install
diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma
index 9980da80c15..55281fc4b1b 100644
--- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma
+++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma
@@ -20,9 +20,10 @@
 #   - RDMA_CORE_VERSION: Set to 32.1 to match the rdma-core line in the latest 
 #                        released MLNX_OFED 5.x driver
 #   - CUDA_VER: 11.8.0 by default
-#   - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
-#                               CUDA runtime from the UCX github repo.
-#                               See: https://github.com/openucx/ucx/releases/
+#   - UCX_VER, UCX_CUDA_VER, and UCX_ARCH: 
+#       Used to pick a package matching a specific UCX version and
+#       CUDA runtime from the UCX github repo.
+#       See: https://github.com/openucx/ucx/releases/
 #   - UBUNTU_VER: 20.04 by default
 #
 # The Dockerfile first fetches and builds `rdma-core` to satisfy requirements for
@@ -34,8 +35,9 @@
 
 ARG RDMA_CORE_VERSION=32.1
 ARG CUDA_VER=11.8.0
-ARG UCX_VER=1.14.0
+ARG UCX_VER=1.15.0
 ARG UCX_CUDA_VER=11
+ARG UCX_ARCH=x86_64
 ARG UBUNTU_VER=20.04
 
 # Throw away image to build rdma_core
@@ -43,6 +45,7 @@ FROM ubuntu:${UBUNTU_VER} as rdma_core
 ARG RDMA_CORE_VERSION
 ARG UBUNTU_VER
 ARG CUDA_VER
+ARG UCX_ARCH
 
 RUN apt-get update && apt-get install -y gnupg2
 # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
@@ -61,6 +64,7 @@ RUN tar -xvf *.tar.gz && cd rdma-core*/ && dpkg-buildpackage -b -d
 FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
 ARG UCX_VER
 ARG UCX_CUDA_VER
+ARG UCX_ARCH
 ARG UBUNTU_VER
 
 RUN mkdir /tmp/ucx_install
@@ -70,7 +74,7 @@ COPY --from=rdma_core /*.deb /tmp/ucx_install/
 RUN apt update
 RUN apt-get install -y wget
 RUN cd /tmp/ucx_install && \
-  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
-  tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
+  wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
+  tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
   apt install -y /tmp/ucx_install/*.deb && \
   rm -rf /tmp/ucx_install
diff --git a/jenkins/Dockerfile-blossom.multi b/jenkins/Dockerfile-blossom.multi
index f7e3cc59674..b5897f01881 100644
--- a/jenkins/Dockerfile-blossom.multi
+++ b/jenkins/Dockerfile-blossom.multi
@@ -26,7 +26,7 @@
 
 ARG CUDA_VER=11.8.0
 ARG UBUNTU_VER=20.04
-ARG UCX_VER=1.15.0-rc6
+ARG UCX_VER=1.15.0
 # multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 <ARGS> on either amd64 or arm64 host
 # check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH)
 FROM --platform=$TARGETPLATFORM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
diff --git a/jenkins/Dockerfile-blossom.ubuntu b/jenkins/Dockerfile-blossom.ubuntu
index b3500c491ac..b3366a5362f 100644
--- a/jenkins/Dockerfile-blossom.ubuntu
+++ b/jenkins/Dockerfile-blossom.ubuntu
@@ -27,13 +27,14 @@
 
 ARG CUDA_VER=11.0.3
 ARG UBUNTU_VER=20.04
-ARG UCX_VER=1.14.0
+ARG UCX_VER=1.15.0
 ARG UCX_CUDA_VER=11
 FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
 ARG CUDA_VER
 ARG UBUNTU_VER
 ARG UCX_VER
 ARG UCX_CUDA_VER
+ARG UCX_ARCH=x86_64
 
 # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
 RUN UB_VER=$(echo ${UBUNTU_VER} | tr -d '.') && \
@@ -65,7 +66,7 @@ RUN apt install -y inetutils-ping expect wget libnuma1 libgomp1
 
 RUN mkdir -p /tmp/ucx && \
     cd /tmp/ucx && \
-    wget https://github.com/openucx/ucx/releases/download/v${UCX_VER}/ucx-${UCX_VER}-ubuntu${UBUNTU_VER}-mofed5-cuda${UCX_CUDA_VER}.tar.bz2 && \
+    wget https://github.com/openucx/ucx/releases/download/v${UCX_VER}/ucx-${UCX_VER}-ubuntu${UBUNTU_VER}-mofed5-cuda${UCX_CUDA_VER}-${UCX_ARCH}.tar.bz2 && \
     tar -xvf *.bz2 && \
     dpkg -i *.deb && \
     rm -rf /tmp/ucx
diff --git a/pom.xml b/pom.xml
index 297492604de..9b11655d7a7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -648,7 +648,7 @@
         https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties#L1993-L1994
         -->
         <scala.javac.args>-Xlint:all,-serial,-path,-try,-processing|-Werror</scala.javac.args>
-        <ucx.version>1.14</ucx.version>
+        <ucx.version>1.15.0</ucx.version>
         <rapids.compressed.artifact>true</rapids.compressed.artifact>
         <rapids.default.jar.excludePattern/>
         <rapids.default.jar.phase>package</rapids.default.jar.phase>
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index fbc33b06cb5..c18ebb13930 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -648,7 +648,7 @@
         https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties#L1993-L1994
         -->
         <scala.javac.args>-Xlint:all,-serial,-path,-try,-processing|-Werror</scala.javac.args>
-        <ucx.version>1.14</ucx.version>
+        <ucx.version>1.15.0</ucx.version>
         <rapids.compressed.artifact>true</rapids.compressed.artifact>
         <rapids.default.jar.excludePattern/>
         <rapids.default.jar.phase>package</rapids.default.jar.phase>

From 1ca5a0904c26b7370463b762acedd0a9a7709e6f Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Wed, 22 Nov 2023 11:22:07 -0800
Subject: [PATCH 07/25] Check paths for existence to prevent ignorable error
 messages during build (#9786)

Fixes #9782

Signed-off-by: Gera Shegalov <gera@apache.org>
---
 aggregator/pom.xml           | 26 +++++++++++++++++---------
 dist/pom.xml                 | 15 ++++++++++++---
 pom.xml                      | 20 +++++++++++++++++---
 scala2.13/aggregator/pom.xml | 26 +++++++++++++++++---------
 scala2.13/dist/pom.xml       | 15 ++++++++++++---
 scala2.13/pom.xml            | 20 +++++++++++++++++---
 scala2.13/sql-plugin/pom.xml | 13 ++++++++++++-
 sql-plugin/pom.xml           | 13 ++++++++++++-
 8 files changed, 116 insertions(+), 32 deletions(-)

diff --git a/aggregator/pom.xml b/aggregator/pom.xml
index 8f8b6da47fc..27c13af1e4d 100644
--- a/aggregator/pom.xml
+++ b/aggregator/pom.xml
@@ -156,15 +156,23 @@
                                 <mkdir dir="${newClassesDir}"/>
                                 <delete dir="${newClassesDir}"/>
                                 <unzip src="${realAggJar}" dest="${newClassesDir}"/>
-                                <mkdir dir="${oldClassesDir}"/>
-
-                                <exec executable="diff"
-                                      resultproperty="diff.result">
-                                    <arg value="-q"/>
-                                    <arg value="-r"/>
-                                    <arg value="${oldClassesDir}"/>
-                                    <arg value="${newClassesDir}"/>
-                                </exec>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <available file="${oldClassesDir}" type="dir"/>
+                                    <then>
+                                        <exec executable="diff"
+                                            resultproperty="diff.result"
+                                            discardOutput="${nonfail.errors.quiet}"
+                                            discardError="${nonfail.errors.quiet}">
+                                            <arg value="-q"/>
+                                            <arg value="-r"/>
+                                            <arg value="${oldClassesDir}"/>
+                                            <arg value="${newClassesDir}"/>
+                                        </exec>
+                                    </then>
+                                    <else>
+                                        <echo>Clean build? Skipping diff because ${oldClassesDir} does not exist</echo>
+                                    </else>
+                                </ac:if>
 
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <equals arg1="0" arg2="${diff.result}"/>
diff --git a/dist/pom.xml b/dist/pom.xml
index a858d2865b5..6fbc047ac47 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -331,9 +331,18 @@
                         <goals><goal>run</goal></goals>
                         <configuration>
                             <target>
-                                <copy todir="${project.build.directory}/parallel-world">
-                                    <fileset dir="${project.build.directory}/jni-deps"/>
-                                </copy>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <available file="${project.build.directory}/jni-deps" type="dir"/>
+                                    <then>
+                                        <copy todir="${project.build.directory}/parallel-world">
+                                            <fileset dir="${project.build.directory}/jni-deps"/>
+                                        </copy>
+                                    </then>
+                                    <else>
+                                        <fail>Re-execute build with the default `-Drapids.jni.unpack.skip=false`</fail>
+                                    </else>
+                                </ac:if>
                             </target>
                         </configuration>
                     </execution>
diff --git a/pom.xml b/pom.xml
index 9b11655d7a7..d099315ef8c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -820,6 +820,7 @@
         <bloop.installPhase>install</bloop.installPhase>
         <bloop.configDirectory>${spark.rapids.source.basedir}/.bloop</bloop.configDirectory>
         <build.info.path>${project.build.outputDirectory}/rapids4spark-version-info.properties</build.info.path>
+        <nonfail.errors.quiet>false</nonfail.errors.quiet>
     </properties>
 
     <dependencyManagement>
@@ -984,11 +985,22 @@
                         <configuration>
                             <!-- Execute the shell script to generate the plugin build information. -->
                             <target name="build-info">
-                                <exec executable="git" outputproperty="git.head.revision">
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <exec executable="git"
+                                      outputproperty="git.head.revision"
+                                      discardError="${nonfail.errors.quiet}">
                                     <arg value="rev-parse"/>
                                     <arg value="HEAD"/>
                                 </exec>
-                                <property file="${build.info.path}" prefix="saved.build-info"/>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <available file="${build.info.path}" type="file"/>
+                                    <then>
+                                        <property file="${build.info.path}" prefix="saved.build-info"/>
+                                    </then>
+                                    <else>
+                                        <property name="saved.build-info.revision" value="N/A"/>
+                                    </else>
+                                </ac:if>
                                 <echo>
 Comparing git revisions:
     previous=${saved.build-info.revision}
@@ -1471,7 +1483,9 @@ This will force full Scala code rebuild in downstream modules.
                                 </pathconvert>
                                 <echo>Cleaning build directories of all modules ${target.dirs.str}</echo>
                                 <!-- workaround ant delete does not work with dirset -->
-                                <exec dir="${project.basedir}" executable="rm">
+                                <exec dir="${project.basedir}"
+                                      executable="rm"
+                                      discardError="${nonfail.errors.quiet}">
                                     <arg value="-rf"/>
                                     <arg line="${target.dirs.str}"/>
                                 </exec>
diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml
index 4868d10d74e..5f85f31de01 100644
--- a/scala2.13/aggregator/pom.xml
+++ b/scala2.13/aggregator/pom.xml
@@ -156,15 +156,23 @@
                                 <mkdir dir="${newClassesDir}"/>
                                 <delete dir="${newClassesDir}"/>
                                 <unzip src="${realAggJar}" dest="${newClassesDir}"/>
-                                <mkdir dir="${oldClassesDir}"/>
-
-                                <exec executable="diff"
-                                      resultproperty="diff.result">
-                                    <arg value="-q"/>
-                                    <arg value="-r"/>
-                                    <arg value="${oldClassesDir}"/>
-                                    <arg value="${newClassesDir}"/>
-                                </exec>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <available file="${oldClassesDir}" type="dir"/>
+                                    <then>
+                                        <exec executable="diff"
+                                            resultproperty="diff.result"
+                                            discardOutput="${nonfail.errors.quiet}"
+                                            discardError="${nonfail.errors.quiet}">
+                                            <arg value="-q"/>
+                                            <arg value="-r"/>
+                                            <arg value="${oldClassesDir}"/>
+                                            <arg value="${newClassesDir}"/>
+                                        </exec>
+                                    </then>
+                                    <else>
+                                        <echo>Clean build? Skipping diff because ${oldClassesDir} does not exist</echo>
+                                    </else>
+                                </ac:if>
 
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <equals arg1="0" arg2="${diff.result}"/>
diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index 7e87dfe5f7c..071ce8247b5 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -331,9 +331,18 @@
                         <goals><goal>run</goal></goals>
                         <configuration>
                             <target>
-                                <copy todir="${project.build.directory}/parallel-world">
-                                    <fileset dir="${project.build.directory}/jni-deps"/>
-                                </copy>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <available file="${project.build.directory}/jni-deps" type="dir"/>
+                                    <then>
+                                        <copy todir="${project.build.directory}/parallel-world">
+                                            <fileset dir="${project.build.directory}/jni-deps"/>
+                                        </copy>
+                                    </then>
+                                    <else>
+                                        <fail>Re-execute build with the default `-Drapids.jni.unpack.skip=false`</fail>
+                                    </else>
+                                </ac:if>
                             </target>
                         </configuration>
                     </execution>
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index c18ebb13930..39a811664e4 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -820,6 +820,7 @@
         <bloop.installPhase>install</bloop.installPhase>
         <bloop.configDirectory>${spark.rapids.source.basedir}/.bloop</bloop.configDirectory>
         <build.info.path>${project.build.outputDirectory}/rapids4spark-version-info.properties</build.info.path>
+        <nonfail.errors.quiet>false</nonfail.errors.quiet>
     </properties>
 
     <dependencyManagement>
@@ -984,11 +985,22 @@
                         <configuration>
                             <!-- Execute the shell script to generate the plugin build information. -->
                             <target name="build-info">
-                                <exec executable="git" outputproperty="git.head.revision">
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <exec executable="git"
+                                      outputproperty="git.head.revision"
+                                      discardError="${nonfail.errors.quiet}">
                                     <arg value="rev-parse"/>
                                     <arg value="HEAD"/>
                                 </exec>
-                                <property file="${build.info.path}" prefix="saved.build-info"/>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <available file="${build.info.path}" type="file"/>
+                                    <then>
+                                        <property file="${build.info.path}" prefix="saved.build-info"/>
+                                    </then>
+                                    <else>
+                                        <property name="saved.build-info.revision" value="N/A"/>
+                                    </else>
+                                </ac:if>
                                 <echo>
 Comparing git revisions:
     previous=${saved.build-info.revision}
@@ -1471,7 +1483,9 @@ This will force full Scala code rebuild in downstream modules.
                                 </pathconvert>
                                 <echo>Cleaning build directories of all modules ${target.dirs.str}</echo>
                                 <!-- workaround ant delete does not work with dirset -->
-                                <exec dir="${project.basedir}" executable="rm">
+                                <exec dir="${project.basedir}"
+                                      executable="rm"
+                                      discardError="${nonfail.errors.quiet}">
                                     <arg value="-rf"/>
                                     <arg line="${target.dirs.str}"/>
                                 </exec>
diff --git a/scala2.13/sql-plugin/pom.xml b/scala2.13/sql-plugin/pom.xml
index 67f3f91c30f..ee849082aa9 100644
--- a/scala2.13/sql-plugin/pom.xml
+++ b/scala2.13/sql-plugin/pom.xml
@@ -179,7 +179,18 @@
                             value="${servicesDir}/com.nvidia.spark.rapids.SparkShimServiceProvider"/>
                         <property name="shimServiceClass"
                             value="com.nvidia.spark.rapids.shims.${spark.version.classifier}.SparkShimServiceProvider"/>
-                        <loadfile property="currentServiceClass" srcFile="${serviceFile}" failonerror="false"/>
+                        <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                            <available file="${serviceFile}" type="file"/>
+                            <then>
+                                <loadfile property="currentServiceClass"
+                                    srcFile="${serviceFile}"
+                                    failonerror="false"
+                                    quiet="${nonfail.errors.quiet}"/>
+                            </then>
+                            <else>
+                                <property name="currentServiceClass" value="N/A"/>
+                            </else>
+                        </ac:if>
                         <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                             <equals arg1="${currentServiceClass}" arg2="${shimServiceClass}${line.separator}"/>
                             <then>
diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml
index 9773cc91ba1..0a7fb1ff8c1 100644
--- a/sql-plugin/pom.xml
+++ b/sql-plugin/pom.xml
@@ -179,7 +179,18 @@
                             value="${servicesDir}/com.nvidia.spark.rapids.SparkShimServiceProvider"/>
                         <property name="shimServiceClass"
                             value="com.nvidia.spark.rapids.shims.${spark.version.classifier}.SparkShimServiceProvider"/>
-                        <loadfile property="currentServiceClass" srcFile="${serviceFile}" failonerror="false"/>
+                        <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                            <available file="${serviceFile}" type="file"/>
+                            <then>
+                                <loadfile property="currentServiceClass"
+                                    srcFile="${serviceFile}"
+                                    failonerror="false"
+                                    quiet="${nonfail.errors.quiet}"/>
+                            </then>
+                            <else>
+                                <property name="currentServiceClass" value="N/A"/>
+                            </else>
+                        </ac:if>
                         <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                             <equals arg1="${currentServiceClass}" arg2="${shimServiceClass}${line.separator}"/>
                             <then>

From 1b4dbd7578984659d79652d75910c31bb2f014db Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Wed, 22 Nov 2023 14:30:07 -0600
Subject: [PATCH 08/25] Avoid pre-Gregorian dates in schema_evolution_test
 (#9835)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 .../src/main/python/schema_evolution_test.py             | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/integration_tests/src/main/python/schema_evolution_test.py b/integration_tests/src/main/python/schema_evolution_test.py
index a457b082858..4138bb11e86 100644
--- a/integration_tests/src/main/python/schema_evolution_test.py
+++ b/integration_tests/src/main/python/schema_evolution_test.py
@@ -14,7 +14,7 @@
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
 from data_gen import *
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
 from marks import ignore_order
 import pytest
 from spark_session import is_databricks_runtime, is_databricks113_or_later
@@ -28,6 +28,9 @@
     "spark.sql.legacy.parquet.int96RebaseModeInWrite": "CORRECTED",
 }
 
+# Using a custom date generator due to https://github.com/NVIDIA/spark-rapids/issues/9807
+_custom_date_gen = DateGen(start=date(1590, 1, 1))
+
 # List of additional column data generators to use when adding columns
 _additional_gens = [
     boolean_gen,
@@ -38,12 +41,12 @@
     float_gen,
     double_gen,
     string_gen,
-    date_gen,
+    _custom_date_gen,
     TimestampGen(start=datetime(1677, 9, 22, tzinfo=timezone.utc), end=datetime(2262, 4, 11, tzinfo=timezone.utc)),
     # RAPIDS Accelerator does not support MapFromArrays yet
     # https://github.com/NVIDIA/spark-rapids/issues/8696
     # simple_string_to_string_map_gen),
-    ArrayGen(date_gen),
+    ArrayGen(_custom_date_gen),
     struct_gen_decimal128,
     StructGen([("c0", ArrayGen(long_gen)), ("c1", boolean_gen)]),
 ]

From 61cfb7de20b4b0f524f4db8e2c77a6e197bf9b47 Mon Sep 17 00:00:00 2001
From: Ferdinand Xu <ferdinandx@nvidia.com>
Date: Thu, 23 Nov 2023 07:36:13 +0800
Subject: [PATCH 09/25] Re-enable AST string integration cases (#9809)

* Fix integration test for non UTF8 case

Signed-off-by: Ferdinand Xu <ferdinandx@nvidia.com>

* Revert

* Remove unused imports

* hardcode LC_ALL to test using pre-merge CI

* Revert "hardcode LC_ALL to test using pre-merge CI"

This reverts commit 3372d1e2e7a1f89bbb5c1a2d7376c87470fec86b.

---------

Signed-off-by: Ferdinand Xu <ferdinandx@nvidia.com>
---
 integration_tests/src/main/python/ast_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/integration_tests/src/main/python/ast_test.py b/integration_tests/src/main/python/ast_test.py
index 0fa03b37d6a..21e982e4fb8 100644
--- a/integration_tests/src/main/python/ast_test.py
+++ b/integration_tests/src/main/python/ast_test.py
@@ -42,8 +42,7 @@
     (double_gen, False),
     (timestamp_gen, True),
     (date_gen, True),
-    pytest.param((string_gen, True),
-                 marks=pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/9771"))
+    (string_gen, True)
 ]
 
 ast_boolean_descr = [(boolean_gen, True)]

From d3629fd1993a391a4aac00d34edabfe58b28061c Mon Sep 17 00:00:00 2001
From: Raza Jafri <razajafri@users.noreply.github.com>
Date: Thu, 23 Nov 2023 02:09:44 +0100
Subject: [PATCH 10/25] Enable build for Databricks 13.3 [databricks] (#9677)

* pom changes

* pom changes

* pom changes

* add databricks13.3 to premerge

* Added ToPrettyString support

* xfail approximate percentile test

* xfail failing udf tests

* xfail failing tests due to WriteIntoDeltaCommand

* xfail test_delta_atomic_create_table_as_select and test_delta_atomic_replace_table_as_select

* Added 341db to shim-deps and removed from datagen/pom.xml

* updated udf-compiler pom.xml

* updated sql-plugin pom.xml

* fixed multiple pom.xml

* updated udf-compiler pom.xml

* removed TODO

* Signoff

Signed-off-by: Raza Jafri <razajafri@users.noreply.github.com>

* updated scala 2.13 poms

* Revert "xfail failing tests due to WriteIntoDeltaCommand"

This reverts commit 00b498ed3ea963605cc36560e8896fe27bd412d2.

* Revert "xfail test_delta_atomic_create_table_as_select and test_delta_atomic_replace_table_as_select"

This reverts commit ea2fd40b8215cdfa845074127a641af62052e947.

* remove tests/pom.xml changes

* reverted 2.13 generation of tests/pom.xml

* removed 341db profile from tests as we don't run unit tests on databricks

* fixed the xfail reason to point to the correct issue

* removed diff.patch

* Revert "xfail approximate percentile test"

This reverts commit 0a7fa52dc06681a9ef8f1da6b36ed35ac2be79dc.

* build fixes

Signed-off-by: Jason Lowe <jlowe@nvidia.com>

* Fix spark321db build

* Skip UDF tests until UDF handling is updated

* Remove xfail/skips eclipsed by module-level skip

* xfail fastparquet tests due to nulls being introduced by pandas

* Fix incorrect shimplify directives for 341db

* Fix fallback test

---------

Signed-off-by: Raza Jafri <razajafri@users.noreply.github.com>
Signed-off-by: Jason Lowe <jlowe@nvidia.com>
Co-authored-by: Jason Lowe <jlowe@nvidia.com>
---
 aggregator/pom.xml                            | 17 ++++++++
 .../src/main/python/delta_lake_merge_test.py  |  2 +-
 .../python/fastparquet_compatibility_test.py  | 25 +++++++----
 .../src/main/python/udf_cudf_test.py          |  7 +++-
 integration_tests/src/main/python/udf_test.py |  8 +++-
 .../Jenkinsfile-blossom.premerge-databricks   |  2 +-
 pom.xml                                       | 29 ++++++++++++-
 scala2.13/aggregator/pom.xml                  | 17 ++++++++
 scala2.13/pom.xml                             | 29 ++++++++++++-
 scala2.13/shim-deps/pom.xml                   | 41 +++++++++++++++++++
 shim-deps/pom.xml                             | 41 +++++++++++++++++++
 .../shims/ParquetLegacyNanoAsLongShims.scala  |  1 -
 .../shims/ParquetTimestampNTZShims.scala      |  1 -
 .../hive/rapids/shims/FileSinkDescShim.scala  |  1 -
 .../rapids/shims/HiveInspectorsShim.scala     |  1 -
 .../shims/TagScanForRuntimeFiltering.scala    |  1 -
 ...puDatabricksShuffleExchangeExecBase.scala} | 16 ++------
 .../rapids/shims/GpuShuffleExchangeExec.scala | 16 +++++++-
 .../spark/rapids/shims/CastCheckShims.scala   |  1 -
 .../ParquetTimestampAnnotationShims.scala     |  1 -
 .../spark/rapids/shims/CastCheckShims.scala   |  1 +
 .../shims/ParquetLegacyNanoAsLongShims.scala  |  1 +
 .../ParquetTimestampAnnotationShims.scala     |  1 +
 .../shims/ParquetTimestampNTZShims.scala      |  1 +
 .../shims/TagScanForRuntimeFiltering.scala    |  1 +
 .../rapids/shims/Spark341PlusDBShims.scala    | 16 +++++++-
 .../rapids/shims/GpuShuffleExchangeExec.scala | 10 +++--
 .../hive/rapids/shims/FileSinkDescShim.scala  |  1 +
 .../rapids/shims/HiveInspectorsShim.scala     |  1 +
 29 files changed, 251 insertions(+), 39 deletions(-)
 rename sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/{GpuShuffleExchangeExecBase.scala => GpuDatabricksShuffleExchangeExecBase.scala} (76%)
 rename sql-plugin/src/main/{spark350 => spark341db}/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala (98%)
 rename sql-plugin/src/main/{spark350 => spark341db}/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala (98%)

diff --git a/aggregator/pom.xml b/aggregator/pom.xml
index 27c13af1e4d..4fa4827ac52 100644
--- a/aggregator/pom.xml
+++ b/aggregator/pom.xml
@@ -619,6 +619,23 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release341db</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>341db</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.nvidia</groupId>
+                    <artifactId>rapids-4-spark-delta-spark341db_${scala.binary.version}</artifactId>
+                    <version>${project.version}</version>
+                    <classifier>${spark.version.classifier}</classifier>
+                </dependency>
+            </dependencies>
+        </profile>
         <profile>
             <id>release333</id>
             <activation>
diff --git a/integration_tests/src/main/python/delta_lake_merge_test.py b/integration_tests/src/main/python/delta_lake_merge_test.py
index 1d43259434b..0ba63380aba 100644
--- a/integration_tests/src/main/python/delta_lake_merge_test.py
+++ b/integration_tests/src/main/python/delta_lake_merge_test.py
@@ -97,7 +97,7 @@ def checker(data_path, do_merge):
                          merge_sql=merge_sql,
                          check_func=checker)
 
-@allow_non_gpu("ExecutedCommandExec,BroadcastHashJoinExec,ColumnarToRowExec,BroadcastExchangeExec,DataWritingCommandExec", *delta_meta_allow)
+@allow_non_gpu("ExecutedCommandExec,BroadcastHashJoinExec,ColumnarToRowExec,BroadcastExchangeExec,DataWritingCommandExec", delta_write_fallback_allow, *delta_meta_allow)
 @delta_lake
 @ignore_order
 @pytest.mark.skipif(is_databricks_runtime() and spark_version() < "3.3.2", reason="NOT MATCHED BY SOURCE added in DBR 12.2")
diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py
index 6ec5ec88fd3..b51fa5a55ef 100644
--- a/integration_tests/src/main/python/fastparquet_compatibility_test.py
+++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py
@@ -17,7 +17,7 @@
 from asserts import assert_gpu_and_cpu_are_equal_collect
 from data_gen import *
 from fastparquet_utils import get_fastparquet_result_canonicalizer
-from spark_session import spark_version, with_cpu_session, with_gpu_session
+from spark_session import is_databricks_runtime, spark_version, with_cpu_session, with_gpu_session
 
 
 def fastparquet_unavailable():
@@ -107,8 +107,12 @@ def read_with_fastparquet_or_plugin(spark):
     pytest.param(IntegerGen(nullable=True),
                  marks=pytest.mark.xfail(reason="Nullables cause merge errors, when converting to Spark dataframe")),
     LongGen(nullable=False),
-    FloatGen(nullable=False),
-    DoubleGen(nullable=False),
+    pytest.param(FloatGen(nullable=False),
+                 marks=pytest.mark.xfail(is_databricks_runtime(),
+                                         reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
+    pytest.param(DoubleGen(nullable=False),
+                 marks=pytest.mark.xfail(is_databricks_runtime(),
+                                         reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
     StringGen(nullable=False),
     pytest.param(DecimalGen(nullable=False),
                  marks=pytest.mark.xfail(reason="fastparquet reads Decimal columns as Float, as per "
@@ -131,8 +135,11 @@ def read_with_fastparquet_or_plugin(spark):
         marks=pytest.mark.xfail(reason="Conversion from Pandas dataframe (read with fastparquet) to Spark dataframe "
                                        "fails: \"Unable to infer the type of the field a\".")),
 
-    StructGen(children=[("first", IntegerGen(nullable=False)),
-                        ("second", FloatGen(nullable=False))], nullable=False)
+    pytest.param(
+        StructGen(children=[("first", IntegerGen(nullable=False)),
+                            ("second", FloatGen(nullable=False))], nullable=False),
+        marks=pytest.mark.xfail(is_databricks_runtime(),
+                                reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
 ], ids=idfn)
 def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
     """
@@ -176,8 +183,12 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
     LongGen(nullable=False),
     pytest.param(LongGen(nullable=True),
                  marks=pytest.mark.xfail(reason="Nullables cause merge errors, when converting to Spark dataframe")),
-    FloatGen(nullable=False),
-    DoubleGen(nullable=False),
+    pytest.param(FloatGen(nullable=False),
+                 marks=pytest.mark.xfail(is_databricks_runtime(),
+                                         reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
+    pytest.param(DoubleGen(nullable=False),
+                 marks=pytest.mark.xfail(is_databricks_runtime(),
+                                         reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
     StringGen(nullable=False),
     pytest.param(DecimalGen(nullable=False),
                  marks=pytest.mark.xfail(reason="fastparquet reads Decimal columns as Float, as per "
diff --git a/integration_tests/src/main/python/udf_cudf_test.py b/integration_tests/src/main/python/udf_cudf_test.py
index 04416315702..6d94a5da206 100644
--- a/integration_tests/src/main/python/udf_cudf_test.py
+++ b/integration_tests/src/main/python/udf_cudf_test.py
@@ -37,10 +37,15 @@
 from typing import Iterator
 from pyspark.sql import Window
 from pyspark.sql.functions import pandas_udf, PandasUDFType
-from spark_session import with_cpu_session, with_gpu_session
+from spark_session import is_databricks_runtime, is_spark_340_or_later, with_cpu_session, with_gpu_session
 from marks import cudf_udf
 
 
+if is_databricks_runtime() and is_spark_340_or_later():
+    # Databricks 13.3 does not use separate reader/writer threads for Python UDFs
+    # which can lead to hangs. Skipping these tests until the Python UDF handling is updated.
+    pytestmark = pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9493")
+
 _conf = {
         'spark.rapids.sql.exec.AggregateInPandasExec': 'true',
         'spark.rapids.sql.exec.FlatMapCoGroupsInPandasExec': 'true',
diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py
index 14fc57cf972..db8425f6387 100644
--- a/integration_tests/src/main/python/udf_test.py
+++ b/integration_tests/src/main/python/udf_test.py
@@ -15,7 +15,7 @@
 import pytest
 
 from conftest import is_at_least_precommit_run
-from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_350_or_later
+from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_340_or_later
 
 from pyspark.sql.pandas.utils import require_minimum_pyarrow_version, require_minimum_pandas_version
 
@@ -43,6 +43,12 @@
 import pyarrow
 from typing import Iterator, Tuple
 
+
+if is_databricks_runtime() and is_spark_340_or_later():
+    # Databricks 13.3 does not use separate reader/writer threads for Python UDFs
+    # which can lead to hangs. Skipping these tests until the Python UDF handling is updated.
+    pytestmark = pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9493")
+
 arrow_udf_conf = {
     'spark.sql.execution.arrow.pyspark.enabled': 'true',
     'spark.rapids.sql.exec.WindowInPandasExec': 'true',
diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks
index 0ea835d39a9..27c42f59aab 100644
--- a/jenkins/Jenkinsfile-blossom.premerge-databricks
+++ b/jenkins/Jenkinsfile-blossom.premerge-databricks
@@ -88,7 +88,7 @@ pipeline {
                         // 'name' and 'value' only supprt literal string in the declarative Jenkins
                         // Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
                         name 'DB_RUNTIME'
-                        values '10.4', '11.3', '12.2'
+                        values '10.4', '11.3', '12.2', '13.3'
                     }
                 }
                 stages {
diff --git a/pom.xml b/pom.xml
index d099315ef8c..7e6ed88cf9f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -509,6 +509,31 @@
                 <module>delta-lake/delta-spark332db</module>
             </modules>
         </profile>
+        <profile>
+            <!-- Note Databricks requires 2 properties -Ddatabricks and -Dbuildver=341db -->
+            <id>release341db</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>341db</value>
+                </property>
+            </activation>
+            <properties>
+                <!-- Downgrade scala plugin version due to: https://github.com/sbt/sbt/issues/4305 -->
+                <scala.plugin.version>3.4.4</scala.plugin.version>
+                <spark.version.classifier>spark341db</spark.version.classifier>
+                <spark.version>${spark341db.version}</spark.version>
+                <spark.test.version>${spark341db.version}</spark.test.version>
+                <hadoop.client.version>3.3.1</hadoop.client.version>
+                <rat.consoleOutput>true</rat.consoleOutput>
+                <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+            </properties>
+            <modules>
+                <module>shim-deps/databricks</module>
+                <module>delta-lake/delta-spark341db</module>
+            </modules>
+        </profile>
         <profile>
             <id>release350</id>
             <activation>
@@ -691,6 +716,7 @@
         <spark332cdh.version>3.3.2.3.3.7190.0-91</spark332cdh.version>
         <spark330db.version>3.3.0-databricks</spark330db.version>
         <spark332db.version>3.3.2-databricks</spark332db.version>
+        <spark341db.version>3.4.1-databricks</spark341db.version>
         <spark350.version>3.5.0</spark350.version>
         <mockito.version>3.12.4</mockito.version>
         <scala.plugin.version>4.3.0</scala.plugin.version>
@@ -745,7 +771,8 @@
         <databricks.buildvers>
             321db,
             330db,
-            332db
+            332db,
+            341db
         </databricks.buildvers>
         <!--
           Build and run unit tests on one specific version for each sub-version (e.g. 311, 320, 330)
diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml
index 5f85f31de01..abda9688ef8 100644
--- a/scala2.13/aggregator/pom.xml
+++ b/scala2.13/aggregator/pom.xml
@@ -619,6 +619,23 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release341db</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>341db</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.nvidia</groupId>
+                    <artifactId>rapids-4-spark-delta-spark341db_${scala.binary.version}</artifactId>
+                    <version>${project.version}</version>
+                    <classifier>${spark.version.classifier}</classifier>
+                </dependency>
+            </dependencies>
+        </profile>
         <profile>
             <id>release333</id>
             <activation>
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 39a811664e4..a5b4c6a7c30 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -509,6 +509,31 @@
                 <module>delta-lake/delta-spark332db</module>
             </modules>
         </profile>
+        <profile>
+            <!-- Note Databricks requires 2 properties -Ddatabricks and -Dbuildver=341db -->
+            <id>release341db</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>341db</value>
+                </property>
+            </activation>
+            <properties>
+                <!-- Downgrade scala plugin version due to: https://github.com/sbt/sbt/issues/4305 -->
+                <scala.plugin.version>3.4.4</scala.plugin.version>
+                <spark.version.classifier>spark341db</spark.version.classifier>
+                <spark.version>${spark341db.version}</spark.version>
+                <spark.test.version>${spark341db.version}</spark.test.version>
+                <hadoop.client.version>3.3.1</hadoop.client.version>
+                <rat.consoleOutput>true</rat.consoleOutput>
+                <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+            </properties>
+            <modules>
+                <module>shim-deps/databricks</module>
+                <module>delta-lake/delta-spark341db</module>
+            </modules>
+        </profile>
         <profile>
             <id>release350</id>
             <activation>
@@ -691,6 +716,7 @@
         <spark332cdh.version>3.3.2.3.3.7190.0-91</spark332cdh.version>
         <spark330db.version>3.3.0-databricks</spark330db.version>
         <spark332db.version>3.3.2-databricks</spark332db.version>
+        <spark341db.version>3.4.1-databricks</spark341db.version>
         <spark350.version>3.5.0</spark350.version>
         <mockito.version>3.12.4</mockito.version>
         <scala.plugin.version>4.3.0</scala.plugin.version>
@@ -745,7 +771,8 @@
         <databricks.buildvers>
             321db,
             330db,
-            332db
+            332db,
+            341db
         </databricks.buildvers>
         <!--
           Build and run unit tests on one specific version for each sub-version (e.g. 311, 320, 330)
diff --git a/scala2.13/shim-deps/pom.xml b/scala2.13/shim-deps/pom.xml
index 163171da7e1..0b53b9d7bf2 100644
--- a/scala2.13/shim-deps/pom.xml
+++ b/scala2.13/shim-deps/pom.xml
@@ -118,6 +118,47 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release341db</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>341db</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.logging.log4j</groupId>
+                    <artifactId>log4j-core</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.parquet</groupId>
+                    <artifactId>parquet-format-internal_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-common-utils_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>shaded.parquet.org.apache.thrift</groupId>
+                    <artifactId>shaded-parquet-thrift_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
         <profile>
             <id>dbdeps</id>
             <activation>
diff --git a/shim-deps/pom.xml b/shim-deps/pom.xml
index b0a8f5ac7b5..11b88fff789 100644
--- a/shim-deps/pom.xml
+++ b/shim-deps/pom.xml
@@ -118,6 +118,47 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release341db</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>341db</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.logging.log4j</groupId>
+                    <artifactId>log4j-core</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.parquet</groupId>
+                    <artifactId>parquet-format-internal_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-common-utils_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>shaded.parquet.org.apache.thrift</groupId>
+                    <artifactId>shaded-parquet-thrift_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
         <profile>
             <id>dbdeps</id>
             <activation>
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
index ab12e17b7d2..de4e1d420bc 100644
--- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
@@ -30,7 +30,6 @@
 {"spark": "331"}
 {"spark": "332cdh"}
 {"spark": "332db"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
index 909c2038009..ea43c9c80e6 100644
--- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
@@ -33,7 +33,6 @@
 {"spark": "332cdh"}
 {"spark": "332db"}
 {"spark": "333"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
index 1e9abaf9d30..98301d8e9ef 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
@@ -36,7 +36,6 @@
 {"spark": "333"}
 {"spark": "340"}
 {"spark": "341"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
 
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
index 87c588cc858..a098b1d64c3 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
@@ -36,7 +36,6 @@
 {"spark": "333"}
 {"spark": "340"}
 {"spark": "341"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
index 14287f5387f..4cb2a615e6b 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
@@ -30,7 +30,6 @@
 {"spark": "332cdh"}
 {"spark": "332db"}
 {"spark": "333"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExecBase.scala b/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala
similarity index 76%
rename from sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExecBase.scala
rename to sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala
index 498275fb396..e36acb114a6 100644
--- a/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExecBase.scala
+++ b/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,12 +23,11 @@ package org.apache.spark.rapids.shims
 
 import com.nvidia.spark.rapids.GpuPartitioning
 
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
-import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan}
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.exchange.{ShuffleExchangeLike, ShuffleOrigin}
-import org.apache.spark.sql.rapids.execution.{GpuShuffleExchangeExecBaseWithMetrics, ShuffledBatchRDD}
+import org.apache.spark.sql.rapids.execution.GpuShuffleExchangeExecBaseWithMetrics
 
 abstract class GpuDatabricksShuffleExchangeExecBase(
     gpuOutputPartitioning: GpuPartitioning,
@@ -46,15 +45,6 @@ abstract class GpuDatabricksShuffleExchangeExecBase(
 
   override def numPartitions: Int = shuffleDependencyColumnar.partitioner.numPartitions
 
-  override def getShuffleRDD(partitionSpecs: Array[ShufflePartitionSpec]): RDD[_] = {
-    new ShuffledBatchRDD(shuffleDependencyColumnar, metrics ++ readMetrics, partitionSpecs)
-  }
-
-  // DB SPECIFIC - throw if called since we don't know how its used
-  override def withNewOutputPartitioning(outputPartitioning: Partitioning) = {
-    throw new UnsupportedOperationException
-  }
-
   override def runtimeStatistics: Statistics = {
     // note that Spark will only use the sizeInBytes statistic but making the rowCount
     // available here means that we can more easily reference it in GpuOverrides when
diff --git a/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
index 25aed8adba8..9f2d1bf27b1 100644
--- a/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
+++ b/sql-plugin/src/main/spark321db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
@@ -22,9 +22,11 @@ package org.apache.spark.rapids.shims
 
 import com.nvidia.spark.rapids.GpuPartitioning
 
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan}
 import org.apache.spark.sql.execution.exchange.ShuffleOrigin
+import org.apache.spark.sql.rapids.execution.ShuffledBatchRDD
 
 case class GpuShuffleExchangeExec(
     gpuOutputPartitioning: GpuPartitioning,
@@ -32,4 +34,14 @@ case class GpuShuffleExchangeExec(
     shuffleOrigin: ShuffleOrigin)(
     cpuOutputPartitioning: Partitioning)
   extends GpuDatabricksShuffleExchangeExecBase(gpuOutputPartitioning,
-    child, shuffleOrigin)(cpuOutputPartitioning)
+    child, shuffleOrigin)(cpuOutputPartitioning) {
+
+    override def getShuffleRDD(partitionSpecs: Array[ShufflePartitionSpec]): RDD[_] = {
+        new ShuffledBatchRDD(shuffleDependencyColumnar, metrics ++ readMetrics, partitionSpecs)
+    }
+
+    // DB SPECIFIC - throw if called since we don't know how its used
+    override def withNewOutputPartitioning(outputPartitioning: Partitioning) = {
+        throw new UnsupportedOperationException
+    }
+}
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
index a4fc36776e7..cb8b283845e 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
@@ -24,7 +24,6 @@
 {"spark": "332cdh"}
 {"spark": "332db"}
 {"spark": "333"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
index 2eb1edfa527..dcf2be96922 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
@@ -23,7 +23,6 @@
 {"spark": "332cdh"}
 {"spark": "332db"}
 {"spark": "333"}
-{"spark": "341db"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
index 42f8a73a552..e6eb01839ba 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
@@ -18,6 +18,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "340"}
 {"spark": "341"}
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
index 68ef1175781..6b4edb3e744 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
@@ -17,6 +17,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "340"}
 {"spark": "341"}
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
index c2c730987ee..cd5cd2f6bef 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
@@ -17,6 +17,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "340"}
 {"spark": "341"}
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
index a229b7767af..dc9e1f9d277 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
@@ -17,6 +17,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "340"}
 {"spark": "341"}
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
index 1204c72052e..f3f0fa9d5e4 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
@@ -17,6 +17,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "340"}
 {"spark": "341"}
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala
index 45bcd373a75..6018f5e51b1 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala
@@ -28,11 +28,25 @@ import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.execution.{CollectLimitExec, GlobalLimitExec, SparkPlan, TakeOrderedAndProjectExec}
 import org.apache.spark.sql.execution.exchange.ENSURE_REQUIREMENTS
 import org.apache.spark.sql.rapids.GpuV1WriteUtils.GpuEmpty2Null
+import org.apache.spark.sql.types.StringType
 
 trait Spark341PlusDBShims extends Spark332PlusDBShims {
 
   override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
     val shimExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(
+      GpuOverrides.expr[ToPrettyString]("An internal expressions which is used to " +
+        "generate pretty string for all kinds of values",
+        new ToPrettyStringChecks(),
+        (toPrettyString, conf, p, r) => {
+          new CastExprMetaBase[ToPrettyString](toPrettyString, conf, p, r) {
+
+            override val toType: StringType.type = StringType
+
+            override def convertToGpu(child: Expression): GpuExpression = {
+              GpuToPrettyString(child)
+            }
+          }
+        }),
       // Empty2Null is pulled out of FileFormatWriter by default since Spark 3.4.0,
       // so it is visible in the overriding stage.
       GpuOverrides.expr[Empty2Null](
@@ -128,4 +142,4 @@ trait Spark341PlusDBShims extends Spark332PlusDBShims {
   override def getExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] =
     super.getExecs ++ shimExecs
 
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
index 35d916bd5e8..182379abf75 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
@@ -21,18 +21,17 @@ package org.apache.spark.rapids.shims
 import com.nvidia.spark.rapids.GpuPartitioning
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan}
 import org.apache.spark.sql.execution.exchange.{ShuffleExchangeLike, ShuffleOrigin}
-import org.apache.spark.sql.rapids.execution.{GpuShuffleExchangeExecBaseWithMetrics, ShuffledBatchRDD}
+import org.apache.spark.sql.rapids.execution.ShuffledBatchRDD
 
 case class GpuShuffleExchangeExec(
     gpuOutputPartitioning: GpuPartitioning,
     child: SparkPlan,
     shuffleOrigin: ShuffleOrigin)(
     cpuOutputPartitioning: Partitioning)
-  extends GpuShuffleExchangeExecBase(gpuOutputPartitioning, child, shuffleOrigin)(
+  extends GpuDatabricksShuffleExchangeExecBase(gpuOutputPartitioning, child, shuffleOrigin)(
     cpuOutputPartitioning) {
 
   override def getShuffleRDD(
@@ -41,6 +40,11 @@ case class GpuShuffleExchangeExec(
     new ShuffledBatchRDD(shuffleDependencyColumnar, metrics ++ readMetrics, partitionSpecs)
   }
 
+  // DB SPECIFIC - throw if called since we don't know how its used
+  override def withNewNumPartitions(numPartitions: Int): ShuffleExchangeLike = {
+    throw new UnsupportedOperationException
+  }
+
   // DB SPECIFIC - throw if called since we don't know how its used
   override def targetOutputPartitioning: Partitioning = {
     throw new UnsupportedOperationException
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
similarity index 98%
rename from sql-plugin/src/main/spark350/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
rename to sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
index 1d94994079c..18414c799de 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
@@ -16,6 +16,7 @@
 
 
 /*** spark-rapids-shim-json-lines
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
similarity index 98%
rename from sql-plugin/src/main/spark350/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
rename to sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
index 41fbaf44d2e..c78268bc438 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
@@ -16,6 +16,7 @@
 
 
 /*** spark-rapids-shim-json-lines
+{"spark": "341db"}
 {"spark": "350"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims

From 3d116a866f3890902fd5b4cd50a234921a2d9caa Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Fri, 24 Nov 2023 14:22:20 +0800
Subject: [PATCH 11/25] Force datagen_seed for test_ceil_scale_zero and
 test_decimal_round (#9848)

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 integration_tests/src/main/python/arithmetic_ops_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py
index cb3c4ebd151..eb734a94d04 100644
--- a/integration_tests/src/main/python/arithmetic_ops_test.py
+++ b/integration_tests/src/main/python/arithmetic_ops_test.py
@@ -606,6 +606,7 @@ def test_ceil(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a)'))
 
+@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9846')
 @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Ceil function is not supported before Spark 3.3.0')
 @pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn)
 def test_ceil_scale_zero(data_gen):
@@ -693,6 +694,7 @@ def test_decimal_bround(data_gen):
 
 @incompat
 @approximate_float
+@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9847")
 @pytest.mark.parametrize('data_gen', _arith_data_gens_for_round, ids=idfn)
 def test_decimal_round(data_gen):
     assert_gpu_and_cpu_are_equal_collect(

From d13f170b651f0290389151d113e85546314910de Mon Sep 17 00:00:00 2001
From: Peixin <pxli@nyu.edu>
Date: Mon, 27 Nov 2023 08:53:56 +0800
Subject: [PATCH 12/25] Delete pre-merge workspace content if success (#9849)

Signed-off-by: Peixin Li <pxli@nyu.edu>
---
 jenkins/Jenkinsfile-blossom.premerge            | 3 +++
 jenkins/Jenkinsfile-blossom.premerge-databricks | 1 +
 2 files changed, 4 insertions(+)

diff --git a/jenkins/Jenkinsfile-blossom.premerge b/jenkins/Jenkinsfile-blossom.premerge
index bf58e7955f8..ac4874a8aac 100755
--- a/jenkins/Jenkinsfile-blossom.premerge
+++ b/jenkins/Jenkinsfile-blossom.premerge
@@ -202,6 +202,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                                         common.publishPytestResult(this, "${STAGE_NAME}")
                                         common.printJVMCoreDumps(this)
                                     }
+                                    deleteDir() // cleanup content if no error
                                 }
                             }
                         }
@@ -233,6 +234,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                                         common.publishPytestResult(this, "${STAGE_NAME}")
                                         common.printJVMCoreDumps(this)
                                     }
+                                    deleteDir() // cleanup content if no error
                                 }
                             }
                         }
@@ -264,6 +266,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                                         common.publishPytestResult(this, "${STAGE_NAME}")
                                         common.printJVMCoreDumps(this)
                                     }
+                                    deleteDir() // cleanup content if no error
                                 }
                             }
                         }
diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks
index 27c42f59aab..2fd2df7a8b0 100644
--- a/jenkins/Jenkinsfile-blossom.premerge-databricks
+++ b/jenkins/Jenkinsfile-blossom.premerge-databricks
@@ -112,6 +112,7 @@ pipeline {
                             script {
                                 unstash('source_tree')
                                 databricksBuild()
+                                deleteDir() // cleanup content if no error
                             }
                         }
                     }

From 6aef5c26bdba2b3edb3831997faddbc94b106acd Mon Sep 17 00:00:00 2001
From: Chong Gao <chongg@nvidia.com>
Date: Mon, 27 Nov 2023 16:13:15 +0800
Subject: [PATCH 13/25] xfail all the impacted cases when using non-UTC time
 zone [databricks] (#9773)

* re-organize imports

Signed-off-by: Chong Gao <res_life@163.com>

* xfail all the cases when it's non-UTC time zone

* Temporarily testing non-UTC test cases becasue of non-UTC TZ pipeline is not ready

* Xfail more cases that involve timestamp type

* Xfail Databricks cases because its default rebase mode is legacy

* Xfail Databricks cases because its default rebase mode is legacy

* Xfail Databricks cases because its default rebase mode is legacy

* Xfail failed cases for Spark 341

* Revert "Xfail Databricks cases because its default rebase mode is legacy"

This reverts commit 34cbc5d63daad1fcf6b38e1616c48f5dd8cefd8a.

* Revert "Xfail Databricks cases because its default rebase mode is legacy"

This reverts commit 57a14769bf392f9b8406e367519e8ffb25c33a9d.

* Revert "Xfail Databricks cases because its default rebase mode is legacy"

This reverts commit c2b5ffb8a66fa47f778ca397c8aa0ae1b2f87ba7.

* Revert "Xfail more cases that involve timestamp type"

This reverts commit 3f8bc40fc0ac37edd1dd4ec5aae7e48992c33ced.

* Revert "Temporarily testing non-UTC test cases becasue of non-UTC TZ pipeline is not ready"

This reverts commit 9530b23bb70ff12c5381c6c3a96129912a0e096f.

* Temporarily testing non-UTC test cases

* Temporarily testing non-UTC test cases

* Fix

* Restore TEST_PARALLEL from 1 to 5 becasue of running is slow

* Add one more file for non-UTC time zone

* Fix import error

* Test a portion of cases for non-UTC time zone in pre-merge

---------

Signed-off-by: Chong Gao <res_life@163.com>
Co-authored-by: Chong Gao <res_life@163.com>
---
 integration_tests/src/main/python/aqe_test.py |  2 ++
 .../src/main/python/arithmetic_ops_test.py    |  3 ++
 .../src/main/python/array_test.py             | 25 ++++++++++++-
 integration_tests/src/main/python/ast_test.py |  9 +++++
 .../src/main/python/cache_test.py             |  5 +++
 .../src/main/python/cast_test.py              | 21 +++++++++--
 integration_tests/src/main/python/cmp_test.py | 16 +++++++++
 .../src/main/python/collection_ops_test.py    | 16 +++++++++
 .../src/main/python/conditionals_test.py      | 11 ++++++
 integration_tests/src/main/python/csv_test.py | 13 ++++---
 .../src/main/python/datasourcev2_read_test.py |  6 ++++
 .../src/main/python/date_time_test.py         | 35 +++++++++++++++++++
 .../src/main/python/expand_exec_test.py       |  2 ++
 .../src/main/python/explain_test.py           |  2 ++
 .../python/fastparquet_compatibility_test.py  |  4 +++
 .../src/main/python/generate_expr_test.py     | 18 ++++++++++
 .../src/main/python/hash_aggregate_test.py    | 30 +++++++++++++---
 .../src/main/python/hashing_test.py           |  3 ++
 .../main/python/hive_delimited_text_test.py   |  7 +++-
 .../src/main/python/hive_write_test.py        |  9 +++--
 .../src/main/python/join_test.py              | 30 +++++++++++++++-
 .../src/main/python/json_test.py              | 23 +++++++++---
 .../src/main/python/limit_test.py             |  3 ++
 integration_tests/src/main/python/map_test.py | 25 +++++++++++--
 .../src/main/python/mortgage_test.py          |  2 ++
 .../src/main/python/orc_cast_test.py          |  5 +++
 integration_tests/src/main/python/orc_test.py | 14 ++++++--
 .../src/main/python/orc_write_test.py         |  7 ++++
 .../src/main/python/parquet_test.py           | 15 ++++++--
 .../src/main/python/parquet_testing_test.py   |  3 +-
 .../src/main/python/parquet_write_test.py     | 20 +++++++++--
 .../src/main/python/qa_nightly_select_test.py |  7 ++++
 .../src/main/python/repart_test.py            | 10 ++++++
 .../src/main/python/row-based_udf_test.py     |  3 ++
 .../src/main/python/row_conversion_test.py    |  4 +++
 .../src/main/python/sample_test.py            |  3 ++
 .../src/main/python/schema_evolution_test.py  |  2 ++
 .../src/main/python/sort_test.py              | 15 ++++++++
 .../src/main/python/struct_test.py            |  3 ++
 .../src/main/python/subquery_test.py          |  4 +++
 .../src/main/python/time_window_test.py       |  7 ++++
 integration_tests/src/main/python/udf_test.py |  7 +++-
 .../src/main/python/window_function_test.py   | 24 +++++++++++++
 jenkins/spark-premerge-build.sh               |  6 ++++
 44 files changed, 441 insertions(+), 38 deletions(-)

diff --git a/integration_tests/src/main/python/aqe_test.py b/integration_tests/src/main/python/aqe_test.py
index dd683c04fd2..189bef329d7 100755
--- a/integration_tests/src/main/python/aqe_test.py
+++ b/integration_tests/src/main/python/aqe_test.py
@@ -16,6 +16,7 @@
 from pyspark.sql.functions import when, col, current_date, current_timestamp
 from pyspark.sql.types import *
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_cpu_and_gpu_are_equal_collect_with_capture
+from conftest import is_not_utc
 from data_gen import *
 from marks import ignore_order, allow_non_gpu
 from spark_session import with_cpu_session, is_databricks113_or_later
@@ -195,6 +196,7 @@ def do_it(spark):
 @ignore_order(local=True)
 @allow_non_gpu('BroadcastNestedLoopJoinExec', 'Cast', 'DateSub', *db_113_cpu_bnlj_join_allow)
 @pytest.mark.parametrize('join', joins, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_aqe_join_reused_exchange_inequality_condition(spark_tmp_path, join):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     def prep(spark):
diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py
index eb734a94d04..e182a0433b9 100644
--- a/integration_tests/src/main/python/arithmetic_ops_test.py
+++ b/integration_tests/src/main/python/arithmetic_ops_test.py
@@ -16,6 +16,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_fallback_collect, assert_gpu_and_cpu_are_equal_sql
+from conftest import is_not_utc
 from data_gen import *
 from marks import ignore_order, incompat, approximate_float, allow_non_gpu, datagen_overrides
 from pyspark.sql.types import *
@@ -987,6 +988,7 @@ def test_columnar_pow(data_gen):
             lambda spark : binary_op_df(spark, data_gen).selectExpr('pow(a, b)'))
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + _arith_decimal_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_least(data_gen):
     num_cols = 20
     s1 = with_cpu_session(
@@ -1003,6 +1005,7 @@ def test_least(data_gen):
                 f.least(*command_args)))
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + _arith_decimal_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_greatest(data_gen):
     num_cols = 20
     s1 = with_cpu_session(
diff --git a/integration_tests/src/main/python/array_test.py b/integration_tests/src/main/python/array_test.py
index ec29dce70d1..29f4e64b893 100644
--- a/integration_tests/src/main/python/array_test.py
+++ b/integration_tests/src/main/python/array_test.py
@@ -16,7 +16,7 @@
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql, assert_gpu_and_cpu_error, assert_gpu_fallback_collect
 from data_gen import *
-from conftest import is_databricks_runtime
+from conftest import is_databricks_runtime, is_not_utc
 from marks import incompat
 from spark_session import is_before_spark_313, is_before_spark_330, is_databricks113_or_later, is_spark_330_or_later, is_databricks104_or_later, is_spark_33X, is_spark_340_or_later, is_spark_330, is_spark_330cdh
 from pyspark.sql.types import *
@@ -103,11 +103,13 @@
 
 @pytest.mark.parametrize('data_gen', array_item_test_gens, ids=idfn)
 @pytest.mark.parametrize('index_gen', array_index_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_item(data_gen, index_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, data_gen, index_gen).selectExpr('a[b]'))
 
 @pytest.mark.parametrize('data_gen', array_item_test_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_item_lit_ordinal(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -145,6 +147,7 @@ def test_array_item_with_strict_index(strict_index_enabled, index):
 
 # No need to test this for multiple data types for array. Only one is enough, but with two kinds of invalid index.
 @pytest.mark.parametrize('index', [-2, 100, array_neg_index_gen, array_out_index_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_item_ansi_fail_invalid_index(index):
     message = "SparkArrayIndexOutOfBoundsException" if (is_databricks104_or_later() or is_spark_330_or_later()) else "java.lang.ArrayIndexOutOfBoundsException"
     if isinstance(index, int):
@@ -171,6 +174,7 @@ def test_array_item_ansi_not_fail_all_null_data():
                          decimal_gen_32bit, decimal_gen_64bit, decimal_gen_128bit, binary_gen,
                          StructGen([['child0', StructGen([['child01', IntegerGen()]])], ['child1', string_gen], ['child2', float_gen]], nullable=False),
                          StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]], nullable=False)], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_make_array(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars_for_sql(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -183,6 +187,7 @@ def test_make_array(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', single_level_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_orderby_array_unique(data_gen):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : append_unique_int_col_to_df(spark, unary_op_df(spark, data_gen)),
@@ -212,6 +217,7 @@ def test_orderby_array_of_structs(data_gen):
 @pytest.mark.parametrize('data_gen', [byte_gen, short_gen, int_gen, long_gen,
                                       float_gen, double_gen,
                                       string_gen, boolean_gen, date_gen, timestamp_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_contains(data_gen):
     arr_gen = ArrayGen(data_gen)
     literal = with_cpu_session(lambda spark: gen_scalar(data_gen, force_no_nulls=True))
@@ -239,6 +245,7 @@ def test_array_contains_for_nans(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', array_item_test_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_element_at(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, data_gen, array_no_zero_index_gen).selectExpr(
@@ -303,6 +310,7 @@ def test_array_element_at_zero_index_fail(index, ansi_enabled):
 
 
 @pytest.mark.parametrize('data_gen', array_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_transform(data_gen):
     def do_it(spark):
         columns = ['a', 'b',
@@ -337,6 +345,7 @@ def do_it(spark):
         string_gen, boolean_gen, date_gen, timestamp_gen, null_gen] + decimal_gens
 
 @pytest.mark.parametrize('data_gen', array_min_max_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_min_max(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, ArrayGen(data_gen)).selectExpr(
@@ -361,6 +370,7 @@ def test_array_concat_decimal(data_gen):
             'concat(a, a)')))
 
 @pytest.mark.parametrize('data_gen', orderable_gens + nested_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_repeat_with_count_column(data_gen):
     cnt_gen = IntegerGen(min_val=-5, max_val=5, special_cases=[])
     cnt_not_null_gen = IntegerGen(min_val=-5, max_val=5, special_cases=[], nullable=False)
@@ -374,6 +384,7 @@ def test_array_repeat_with_count_column(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', orderable_gens + nested_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_repeat_with_count_scalar(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -403,6 +414,7 @@ def test_sql_array_scalars(query):
 
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + nested_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_array_struct_fields(data_gen):
     array_struct_gen = ArrayGen(
         StructGen([['child0', data_gen], ['child1', int_gen]]),
@@ -441,6 +453,7 @@ def do_it(spark):
 
 
 @pytest.mark.parametrize('data_gen', array_zips_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_arrays_zip(data_gen):
     gen = StructGen(
         [('a', data_gen), ('b', data_gen), ('c', data_gen), ('d', data_gen)], nullable=False)
@@ -473,6 +486,7 @@ def q1(spark):
 
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens + decimal_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 @pytest.mark.skipif(is_before_spark_313() or is_spark_330() or is_spark_330cdh(), reason="NaN equality is only handled in Spark 3.1.3+ and SPARK-39976 issue with null and ArrayIntersect in Spark 3.3.0")
 def test_array_intersect(data_gen):
     gen = StructGen(
@@ -514,6 +528,7 @@ def test_array_intersect_spark330(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens_no_nans + decimal_gens, ids=idfn)
 @pytest.mark.skipif(not is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_intersect_before_spark313(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -534,6 +549,7 @@ def test_array_intersect_before_spark313(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens + decimal_gens, ids=idfn)
 @pytest.mark.skipif(is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_union(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -554,6 +570,7 @@ def test_array_union(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens_no_nans + decimal_gens, ids=idfn)
 @pytest.mark.skipif(not is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_union_before_spark313(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -574,6 +591,7 @@ def test_array_union_before_spark313(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens + decimal_gens, ids=idfn)
 @pytest.mark.skipif(is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_except(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -594,6 +612,7 @@ def test_array_except(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens_no_nans + decimal_gens, ids=idfn)
 @pytest.mark.skipif(not is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_except_before_spark313(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -614,6 +633,7 @@ def test_array_except_before_spark313(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens + decimal_gens, ids=idfn)
 @pytest.mark.skipif(is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_arrays_overlap(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -635,6 +655,7 @@ def test_arrays_overlap(data_gen):
 @incompat
 @pytest.mark.parametrize('data_gen', no_neg_zero_all_basic_gens_no_nans + decimal_gens, ids=idfn)
 @pytest.mark.skipif(not is_before_spark_313(), reason="NaN equality is only handled in Spark 3.1.3+")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_arrays_overlap_before_spark313(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -672,6 +693,7 @@ def test_array_remove_scalar(data_gen):
                                       FloatGen(special_cases=_non_neg_zero_float_special_cases + [-0.0]), 
                                       DoubleGen(special_cases=_non_neg_zero_double_special_cases + [-0.0]),
                                       StringGen(pattern='[0-9]{1,5}'), boolean_gen, date_gen, timestamp_gen] + decimal_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_array_remove(data_gen):
     gen = StructGen(
         [('a', ArrayGen(data_gen, nullable=True)),
@@ -686,6 +708,7 @@ def test_array_remove(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', [ArrayGen(sub_gen) for sub_gen in array_gens_sample], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_flatten_array(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr('flatten(a)')
diff --git a/integration_tests/src/main/python/ast_test.py b/integration_tests/src/main/python/ast_test.py
index 21e982e4fb8..2c06c51a876 100644
--- a/integration_tests/src/main/python/ast_test.py
+++ b/integration_tests/src/main/python/ast_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_cpu_and_gpu_are_equal_collect_with_capture
+from conftest import is_not_utc
 from data_gen import *
 from marks import approximate_float, datagen_overrides
 from spark_session import with_cpu_session, is_before_spark_330
@@ -70,6 +71,7 @@ def assert_binary_ast(data_descr, func, conf={}):
     assert_gpu_ast(is_supported, lambda spark: func(binary_op_df(spark, data_gen)), conf=conf)
 
 @pytest.mark.parametrize('data_gen', [boolean_gen, byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen, timestamp_gen, date_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_literal(spark_tmp_path, data_gen):
     # Write data to Parquet so Spark generates a plan using just the count of the data.
     data_path = spark_tmp_path + '/AST_TEST_DATA'
@@ -79,6 +81,7 @@ def test_literal(spark_tmp_path, data_gen):
                    func=lambda spark: spark.read.parquet(data_path).select(scalar))
 
 @pytest.mark.parametrize('data_gen', [boolean_gen, byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen, timestamp_gen, date_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_null_literal(spark_tmp_path, data_gen):
     # Write data to Parquet so Spark generates a plan using just the count of the data.
     data_path = spark_tmp_path + '/AST_TEST_DATA'
@@ -232,6 +235,7 @@ def test_expm1(data_descr):
     assert_unary_ast(data_descr, lambda df: df.selectExpr('expm1(a)'))
 
 @pytest.mark.parametrize('data_descr', ast_comparable_descrs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_eq(data_descr):
     (s1, s2) = with_cpu_session(lambda spark: gen_scalars(data_descr[0], 2))
     assert_binary_ast(data_descr,
@@ -241,6 +245,7 @@ def test_eq(data_descr):
             f.col('a') == f.col('b')))
 
 @pytest.mark.parametrize('data_descr', ast_comparable_descrs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_ne(data_descr):
     (s1, s2) = with_cpu_session(lambda spark: gen_scalars(data_descr[0], 2))
     assert_binary_ast(data_descr,
@@ -250,6 +255,7 @@ def test_ne(data_descr):
             f.col('a') != f.col('b')))
 
 @pytest.mark.parametrize('data_descr', ast_comparable_descrs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_lt(data_descr):
     (s1, s2) = with_cpu_session(lambda spark: gen_scalars(data_descr[0], 2))
     assert_binary_ast(data_descr,
@@ -259,6 +265,7 @@ def test_lt(data_descr):
             f.col('a') < f.col('b')))
 
 @pytest.mark.parametrize('data_descr', ast_comparable_descrs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_lte(data_descr):
     (s1, s2) = with_cpu_session(lambda spark: gen_scalars(data_descr[0], 2))
     assert_binary_ast(data_descr,
@@ -268,6 +275,7 @@ def test_lte(data_descr):
             f.col('a') <= f.col('b')))
 
 @pytest.mark.parametrize('data_descr', ast_comparable_descrs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_gt(data_descr):
     (s1, s2) = with_cpu_session(lambda spark: gen_scalars(data_descr[0], 2))
     assert_binary_ast(data_descr,
@@ -277,6 +285,7 @@ def test_gt(data_descr):
             f.col('a') > f.col('b')))
 
 @pytest.mark.parametrize('data_descr', ast_comparable_descrs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_gte(data_descr):
     (s1, s2) = with_cpu_session(lambda spark: gen_scalars(data_descr[0], 2))
     assert_binary_ast(data_descr,
diff --git a/integration_tests/src/main/python/cache_test.py b/integration_tests/src/main/python/cache_test.py
index 662d4d9d8aa..e028e93a959 100644
--- a/integration_tests/src/main/python/cache_test.py
+++ b/integration_tests/src/main/python/cache_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_equal
+from conftest import is_not_utc
 from data_gen import *
 import pyspark.sql.functions as f
 from spark_session import with_cpu_session, with_gpu_session, is_before_spark_330
@@ -64,6 +65,7 @@ def test_passing_gpuExpr_as_Expr(enable_vectorized_conf):
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('enable_vectorized_conf', enable_vectorized_confs, ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cache_join(data_gen, enable_vectorized_conf):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
@@ -91,6 +93,7 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('enable_vectorized_conf', enable_vectorized_confs, ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cache_expand_exec(data_gen, enable_vectorized_conf):
     def op_df(spark, length=2048):
         cached = gen_df(spark, StructGen([
@@ -165,6 +168,7 @@ def n_fold(spark):
 @pytest.mark.parametrize('enable_vectorized', ['true', 'false'], ids=idfn)
 @ignore_order
 @allow_non_gpu("SortExec", "ShuffleExchangeExec", "RangePartitioning")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cache_columnar(spark_tmp_path, data_gen, enable_vectorized, ts_write):
     data_path_gpu = spark_tmp_path + '/PARQUET_DATA'
     def read_parquet_cached(data_path):
@@ -277,6 +281,7 @@ def helper(spark):
 @pytest.mark.parametrize('enable_vectorized_conf', enable_vectorized_confs, ids=idfn)
 @pytest.mark.parametrize('batch_size', [{"spark.rapids.sql.batchSizeBytes": "100"}, {}], ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cache_count(data_gen, with_x_session, enable_vectorized_conf, batch_size):
     test_conf = copy_and_update(enable_vectorized_conf, batch_size)
     generate_data_and_test_func_on_cached_df(with_x_session, lambda df: df.count(), data_gen, test_conf)
diff --git a/integration_tests/src/main/python/cast_test.py b/integration_tests/src/main/python/cast_test.py
index 53803a17403..dbb41b60bb7 100644
--- a/integration_tests/src/main/python/cast_test.py
+++ b/integration_tests/src/main/python/cast_test.py
@@ -14,10 +14,10 @@
 
 import pytest
 
-from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql, assert_gpu_and_cpu_error, assert_gpu_fallback_collect, assert_spark_exception
+from asserts import *
+from conftest import is_not_utc
 from data_gen import *
-from spark_session import is_before_spark_320, is_before_spark_330, is_spark_340_or_later, \
-    is_databricks113_or_later
+from spark_session import *
 from marks import allow_non_gpu, approximate_float, datagen_overrides
 from pyspark.sql.types import *
 from spark_init_internal import spark_version
@@ -152,6 +152,7 @@ def test_cast_string_date_non_ansi():
                                       StringGen('[0-9]{1,4}-[0-3][0-9]-[0-5][0-9][ |T][0-3][0-9]:[0-6][0-9]:[0-6][0-9]'),
                                       StringGen('[0-9]{1,4}-[0-3][0-9]-[0-5][0-9][ |T][0-3][0-9]:[0-6][0-9]:[0-6][0-9].[0-9]{0,6}Z?')],
                         ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_string_ts_valid_format(data_gen):
     # In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
     # This provides values that are valid in all of those formats.
@@ -299,6 +300,7 @@ def _assert_cast_to_string_equal (data_gen, conf):
 
 @pytest.mark.parametrize('data_gen', all_array_gens_for_cast_to_string, ids=idfn)
 @pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_array_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
         data_gen,
@@ -318,6 +320,7 @@ def test_cast_array_with_unmatched_element_to_string(data_gen, legacy):
 
 @pytest.mark.parametrize('data_gen', basic_map_gens_for_cast_to_string, ids=idfn)
 @pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_map_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
         data_gen,
@@ -337,6 +340,7 @@ def test_cast_map_with_unmatched_element_to_string(data_gen, legacy):
 
 @pytest.mark.parametrize('data_gen', [StructGen([[str(i), gen] for i, gen in enumerate(basic_array_struct_gens_for_cast_to_string)] + [["map", MapGen(ByteGen(nullable=False), null_gen)]])], ids=idfn)
 @pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_struct_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
         data_gen,
@@ -401,6 +405,7 @@ def test_cast_string_to_negative_scale_decimal():
 @pytest.mark.skipif(is_before_spark_330(), reason="ansi cast throws exception only in 3.3.0+")
 @pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
 @pytest.mark.parametrize('invalid_value', [float("inf"), float("-inf"), float("nan")])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_float_to_timestamp_ansi_for_nan_inf(type, invalid_value):
     def fun(spark):
         data = [invalid_value]
@@ -412,6 +417,7 @@ def fun(spark):
 @pytest.mark.skipif(is_before_spark_330(), reason="ansi cast throws exception only in 3.3.0+")
 @pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
 @pytest.mark.parametrize('invalid_value', [float(LONG_MAX) + 100, float(LONG_MIN) - 100])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_float_to_timestamp_ansi_overflow(type, invalid_value):
     def fun(spark):
         data = [invalid_value]
@@ -420,6 +426,7 @@ def fun(spark):
     assert_gpu_and_cpu_error(fun, {"spark.sql.ansi.enabled": True}, "ArithmeticException")
 
 @pytest.mark.skipif(is_before_spark_330(), reason='330+ throws exception in ANSI mode')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_float_to_timestamp_side_effect():
     def getDf(spark):
         data = [(True, float(LONG_MAX) + 100), (False, float(1))]
@@ -431,6 +438,7 @@ def getDf(spark):
 
 # non ansi mode, will get null
 @pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_float_to_timestamp_for_nan_inf(type):
     def fun(spark):
         data = [(float("inf"),), (float("-inf"),), (float("nan"),)]
@@ -450,6 +458,7 @@ def fun(spark):
     short_gen,
     int_gen,
     long_gen_to_timestamp], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_integral_to_timestamp(gen, ansi_enabled):
     if(is_before_spark_330() and ansi_enabled): # 330- does not support in ANSI mode
         pytest.skip()
@@ -458,6 +467,7 @@ def test_cast_integral_to_timestamp(gen, ansi_enabled):
         conf={"spark.sql.ansi.enabled": ansi_enabled})
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_float_to_timestamp(ansi_enabled):
     if(is_before_spark_330() and ansi_enabled): # 330- does not support in ANSI mode
         pytest.skip()
@@ -467,6 +477,7 @@ def test_cast_float_to_timestamp(ansi_enabled):
         conf={"spark.sql.ansi.enabled": ansi_enabled})
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_double_to_timestamp(ansi_enabled):
     if (is_before_spark_330() and ansi_enabled):  # 330- does not support in ANSI mode
         pytest.skip()
@@ -484,6 +495,7 @@ def test_cast_double_to_timestamp(ansi_enabled):
     (INT_MIN - 1, IntegerType()),
 ], ids=idfn)
 @pytest.mark.skipif(is_before_spark_330(), reason="Spark 330- does not ansi casting between numeric and timestamp")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_timestamp_to_integral_ansi_overflow(invalid_and_type):
     (invalid, to_type) = invalid_and_type
     assert_gpu_and_cpu_error(
@@ -494,6 +506,7 @@ def test_cast_timestamp_to_integral_ansi_overflow(invalid_and_type):
         error_message="overflow")
 
 @pytest.mark.skipif(is_before_spark_330(), reason="Spark 330- does not ansi casting between numeric and timestamp")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_timestamp_to_numeric_ansi_no_overflow():
     data = [datetime.fromtimestamp(i) for i in range(BYTE_MIN, BYTE_MAX + 1)]
     assert_gpu_and_cpu_are_equal_collect(
@@ -502,12 +515,14 @@ def test_cast_timestamp_to_numeric_ansi_no_overflow():
                         "cast(value as float)", "cast(value as double)"),
         conf=ansi_enabled_conf)
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_timestamp_to_numeric_non_ansi():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, timestamp_gen)
             .selectExpr("cast(a as byte)", "cast(a as short)", "cast(a as int)", "cast(a as long)",
                         "cast(a as float)", "cast(a as double)"))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cast_timestamp_to_string():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, timestamp_gen)
diff --git a/integration_tests/src/main/python/cmp_test.py b/integration_tests/src/main/python/cmp_test.py
index a211e881aaf..a891b667016 100644
--- a/integration_tests/src/main/python/cmp_test.py
+++ b/integration_tests/src/main/python/cmp_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from spark_session import with_cpu_session, is_before_spark_330
 from pyspark.sql.types import *
@@ -22,6 +23,7 @@
 import pyspark.sql.functions as f
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + struct_gens_sample_with_decimal128_no_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_eq(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -55,6 +57,7 @@ def test_func(data_gen):
         test_func(data_gen)
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + struct_gens_sample_with_decimal128_no_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_eq_ns(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -82,6 +85,7 @@ def test_eq_ns_for_interval():
             f.col('a').eqNullSafe(f.col('b'))))
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + struct_gens_sample_with_decimal128_no_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_ne(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -115,6 +119,7 @@ def test_func(data_gen):
         test_func(data_gen)
 
 @pytest.mark.parametrize('data_gen', orderable_gens + struct_gens_sample_with_decimal128_no_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_lt(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -148,6 +153,7 @@ def test_func(data_gen):
         test_func(data_gen)
 
 @pytest.mark.parametrize('data_gen', orderable_gens + struct_gens_sample_with_decimal128_no_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_lte(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -182,6 +188,7 @@ def test_func(data_gen):
         test_func(data_gen)
 
 @pytest.mark.parametrize('data_gen', orderable_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_gt(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -216,6 +223,7 @@ def test_func(data_gen):
         test_func(data_gen)
 
 @pytest.mark.parametrize('data_gen', orderable_gens + struct_gens_sample_with_decimal128_no_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_gte(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -250,6 +258,7 @@ def test_func(data_gen):
         test_func(data_gen)
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + [binary_gen] + array_gens_sample + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_isnull(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(
@@ -269,23 +278,27 @@ def test_isnan(data_gen):
                 f.isnan(f.col('a'))))
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + [binary_gen] + array_gens_sample + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_dropna_any(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).dropna())
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + [binary_gen] + array_gens_sample + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_dropna_all(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).dropna(how='all'))
 
 #dropna is really a filter along with a test for null, but lets do an explicit filter test too
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + array_gens_sample + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_filter(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : three_col_df(spark, BooleanGen(), data_gen, data_gen).filter(f.col('a')))
 
 # coalesce batch happens after a filter, but only if something else happens on the GPU after that
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + array_gens_sample + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_filter_with_project(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, BooleanGen(), data_gen).filter(f.col('a')).selectExpr('*', 'a as a2'))
@@ -295,6 +308,7 @@ def test_filter_with_project(data_gen):
 # and some constants that then make it so all we need is the number of rows
 # of input.
 @pytest.mark.parametrize('op', ['>', '<'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_empty_filter(op, spark_tmp_path):
 
     def do_it(spark):
@@ -323,6 +337,7 @@ def test_filter_with_lit(expr):
 # Spark supports two different versions of 'IN', and it depends on the spark.sql.optimizer.inSetConversionThreshold conf
 # This is to test entries under that value.
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_in(data_gen):
     # nulls are not supported for in on the GPU yet
     num_entries = int(with_cpu_session(lambda spark: spark.conf.get('spark.sql.optimizer.inSetConversionThreshold'))) - 1
@@ -335,6 +350,7 @@ def test_in(data_gen):
 # This is to test entries over that value.
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9687')
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_in_set(data_gen):
     # nulls are not supported for in on the GPU yet
     num_entries = int(with_cpu_session(lambda spark: spark.conf.get('spark.sql.optimizer.inSetConversionThreshold'))) + 1
diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py
index 971523248ab..43cc782df0f 100644
--- a/integration_tests/src/main/python/collection_ops_test.py
+++ b/integration_tests/src/main/python/collection_ops_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error
+from conftest import is_not_utc
 from data_gen import *
 from pyspark.sql.types import *
 from string_test import mk_str_gen
@@ -34,6 +35,7 @@
                          for sub_gen in all_gen + [null_gen]]
 
 @pytest.mark.parametrize('data_gen', non_nested_array_gens + nested_array_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_concat_list(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: three_col_df(spark, data_gen, data_gen, data_gen).selectExpr(
@@ -44,6 +46,7 @@ def test_concat_list(data_gen):
         )
 
 @pytest.mark.parametrize('dg', non_nested_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_concat_double_list_with_lit(dg):
     data_gen = ArrayGen(dg, max_length=2)
     array_lit = with_cpu_session(lambda spark: gen_scalar(data_gen))
@@ -67,6 +70,7 @@ def test_concat_double_list_with_lit(dg):
 
 
 @pytest.mark.parametrize('data_gen', non_nested_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_concat_list_with_lit(data_gen):
     lit_col1 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type)
     lit_col2 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type)
@@ -95,6 +99,7 @@ def test_concat_string():
                 f.concat(f.col('a'), f.lit(''))))
 
 @pytest.mark.parametrize('data_gen', map_gens_sample + decimal_64_map_gens + decimal_128_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_concat(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: three_col_df(spark, data_gen, data_gen, data_gen
@@ -106,6 +111,7 @@ def test_map_concat(data_gen):
     )
 
 @pytest.mark.parametrize('data_gen', map_gens_sample + decimal_64_map_gens + decimal_128_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_concat_with_lit(data_gen):
     lit_col1 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type)
     lit_col2 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type)
@@ -119,6 +125,7 @@ def test_map_concat_with_lit(data_gen):
 
 @pytest.mark.parametrize('data_gen', all_gen + nested_gens, ids=idfn)
 @pytest.mark.parametrize('size_of_null', ['true', 'false'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_size_of_array(data_gen, size_of_null):
     gen = ArrayGen(data_gen)
     assert_gpu_and_cpu_are_equal_collect(
@@ -127,12 +134,14 @@ def test_size_of_array(data_gen, size_of_null):
 
 @pytest.mark.parametrize('data_gen', map_gens_sample, ids=idfn)
 @pytest.mark.parametrize('size_of_null', ['true', 'false'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_size_of_map(data_gen, size_of_null):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark: unary_op_df(spark, data_gen).selectExpr('size(a)'),
             conf={'spark.sql.legacy.sizeOfNull': size_of_null})
 
 @pytest.mark.parametrize('data_gen', array_gens_sample + [string_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_reverse(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark: unary_op_df(spark, data_gen).selectExpr('reverse(a)'))
@@ -143,6 +152,7 @@ def test_reverse(data_gen):
         ]
 
 @pytest.mark.parametrize('data_gen', _sort_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sort_array(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).select(
@@ -150,6 +160,7 @@ def test_sort_array(data_gen):
             f.sort_array(f.col('a'), False)))
 
 @pytest.mark.parametrize('data_gen', _sort_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sort_array_lit(data_gen):
     array_lit = with_cpu_session(lambda spark: gen_scalar(data_gen))
     assert_gpu_and_cpu_are_equal_collect(
@@ -250,6 +261,7 @@ def test_sort_array_normalize_nans():
     gens in sequence_normal_integral_gens]
 
 @pytest.mark.parametrize('start_gen,stop_gen', sequence_normal_no_step_integral_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sequence_without_step(start_gen, stop_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, start_gen, stop_gen).selectExpr(
@@ -258,6 +270,7 @@ def test_sequence_without_step(start_gen, stop_gen):
             "sequence(20, b)"))
 
 @pytest.mark.parametrize('start_gen,stop_gen,step_gen', sequence_normal_integral_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sequence_with_step(start_gen, stop_gen, step_gen):
     # Get the datagen seed we use for all datagens, since we need to call start
     # on step_gen
@@ -304,6 +317,7 @@ def test_sequence_with_step(start_gen, stop_gen, step_gen):
 ]
 
 @pytest.mark.parametrize('start_gen,stop_gen,step_gen', sequence_illegal_boundaries_integral_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen):
     assert_gpu_and_cpu_error(
         lambda spark:three_col_df(spark, start_gen, stop_gen, step_gen).selectExpr(
@@ -318,6 +332,7 @@ def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen):
 ]
 
 @pytest.mark.parametrize('stop_gen', sequence_too_long_length_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sequence_too_long_sequence(stop_gen):
     assert_gpu_and_cpu_error(
         # To avoid OOM, reduce the row number to 1, it is enough to verify this case.
@@ -359,6 +374,7 @@ def get_sequence_data(gen, len):
         mixed_schema)
 
 # test for 3 cases mixed in a single dataset
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sequence_with_step_mixed_cases():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: get_sequence_cases_mixed_df(spark)
diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py
index c819a64f549..48d5a05c099 100644
--- a/integration_tests/src/main/python/conditionals_test.py
+++ b/integration_tests/src/main/python/conditionals_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from spark_session import is_before_spark_320, is_jvm_charset_utf8
 from pyspark.sql.types import *
@@ -44,6 +45,7 @@ def mk_str_gen(pattern):
 if_nested_gens = if_array_gens_sample + if_struct_gens_sample
 
 @pytest.mark.parametrize('data_gen', all_gens + if_nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_if_else(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars_for_sql(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -63,6 +65,7 @@ def test_if_else(data_gen):
 # Maps scalars are not really supported by Spark from python without jumping through a lot of hoops
 # so for now we are going to skip them
 @pytest.mark.parametrize('data_gen', map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_if_else_map(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : three_col_df(spark, boolean_gen, data_gen, data_gen).selectExpr(
@@ -72,6 +75,7 @@ def test_if_else_map(data_gen):
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9685')
 @pytest.mark.order(1) # at the head of xdist worker queue if pytest-order is installed
 @pytest.mark.parametrize('data_gen', all_gens + all_nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_case_when(data_gen):
     num_cmps = 20
     s1 = with_cpu_session(
@@ -115,6 +119,7 @@ def test_nanvl(data_gen):
                 f.nanvl(f.lit(float('nan')).cast(data_type), f.col('b'))))
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + decimal_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_nvl(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars_for_sql(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -134,6 +139,7 @@ def test_nvl(data_gen):
 #         at least one `BoundReference`
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9684')
 @pytest.mark.parametrize('data_gen', all_gens + all_nested_gens_nonempty_struct + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_coalesce(data_gen):
     num_cols = 20
     s1 = with_cpu_session(
@@ -155,6 +161,7 @@ def test_coalesce_constant_output():
             lambda spark : spark.range(1, 100).selectExpr("4 + coalesce(5, id) as nine"))
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + decimal_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_nvl2(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars_for_sql(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -168,6 +175,7 @@ def test_nvl2(data_gen):
                 'nvl2(a, {}, c)'.format(null_lit)))
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_nullif(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars_for_sql(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -181,6 +189,7 @@ def test_nullif(data_gen):
                 'nullif(a, {})'.format(null_lit)))
 
 @pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_ifnull(data_gen):
     (s1, s2) = with_cpu_session(
         lambda spark: gen_scalars_for_sql(data_gen, 2, force_no_nulls=not isinstance(data_gen, NullGen)))
@@ -232,6 +241,7 @@ def test_conditional_with_side_effects_case_when(data_gen):
                 conf = test_conf)
 
 @pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_conditional_with_side_effects_sequence(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr(
@@ -242,6 +252,7 @@ def test_conditional_with_side_effects_sequence(data_gen):
 
 @pytest.mark.skipif(is_before_spark_320(), reason='Earlier versions of Spark cannot cast sequence to string')
 @pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_conditional_with_side_effects_sequence_cast(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr(
diff --git a/integration_tests/src/main/python/csv_test.py b/integration_tests/src/main/python/csv_test.py
index 19ad8d29151..c10221a4407 100644
--- a/integration_tests/src/main/python/csv_test.py
+++ b/integration_tests/src/main/python/csv_test.py
@@ -14,14 +14,13 @@
 
 import pytest
 
-from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_and_cpu_row_counts_equal, assert_gpu_fallback_write, \
-    assert_cpu_and_gpu_are_equal_collect_with_capture, assert_gpu_fallback_collect
-from conftest import get_non_gpu_allowed
+from asserts import *
+from conftest import get_non_gpu_allowed, is_not_utc
 from datetime import datetime, timezone
 from data_gen import *
 from marks import *
 from pyspark.sql.types import *
-from spark_session import with_cpu_session, is_before_spark_330, is_spark_350_or_later, is_before_spark_340, is_before_spark_341
+from spark_session import *
 
 _acq_schema = StructType([
     StructField('loan_id', LongType()),
@@ -249,6 +248,7 @@ def read_impl(spark):
 @pytest.mark.parametrize('read_func', [read_csv_df, read_csv_sql])
 @pytest.mark.parametrize('v1_enabled_list', ["", "csv"])
 @pytest.mark.parametrize('ansi_enabled', ["true", "false"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_basic_csv_read(std_input_path, name, schema, options, read_func, v1_enabled_list, ansi_enabled, spark_tmp_table_factory):
     updated_conf=copy_and_update(_enable_all_types_conf, {
         'spark.sql.sources.useV1SourceList': v1_enabled_list,
@@ -289,6 +289,7 @@ def test_csv_read_small_floats(std_input_path, name, schema, options, read_func,
 @approximate_float
 @pytest.mark.parametrize('data_gen', csv_supported_gens, ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "csv"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_round_trip(spark_tmp_path, data_gen, v1_enabled_list):
     gen = StructGen([('a', data_gen)], nullable=False)
     data_path = spark_tmp_path + '/CSV_DATA'
@@ -405,6 +406,7 @@ def test_read_valid_and_invalid_dates(std_input_path, filename, v1_enabled_list,
 @pytest.mark.parametrize('ts_part', csv_supported_ts_parts)
 @pytest.mark.parametrize('date_format', csv_supported_date_formats)
 @pytest.mark.parametrize('v1_enabled_list', ["", "csv"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_ts_formats_round_trip(spark_tmp_path, date_format, ts_part, v1_enabled_list):
     full_format = date_format + ts_part
     data_gen = TimestampGen()
@@ -475,6 +477,7 @@ def test_input_meta_fallback(spark_tmp_path, v1_enabled_list, disable_conf):
             conf=updated_conf)
 
 @allow_non_gpu('DataWritingCommandExec,ExecutedCommandExec,WriteFilesExec')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_csv_save_as_table_fallback(spark_tmp_path, spark_tmp_table_factory):
     gen = TimestampGen()
     data_path = spark_tmp_path + '/CSV_DATA'
@@ -567,6 +570,7 @@ def test_csv_read_count(spark_tmp_path):
 @pytest.mark.parametrize("timestamp_type", [
     pytest.param('TIMESTAMP_LTZ', marks=pytest.mark.xfail(is_spark_350_or_later(), reason="https://github.com/NVIDIA/spark-rapids/issues/9325")),
     "TIMESTAMP_NTZ"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_csv_infer_schema_timestamp_ntz_v1(spark_tmp_path, date_format, ts_part, timestamp_type):
     csv_infer_schema_timestamp_ntz(spark_tmp_path, date_format, ts_part, timestamp_type, 'csv', 'FileSourceScanExec')
 
@@ -619,6 +623,7 @@ def do_read(spark):
 
 @allow_non_gpu('FileSourceScanExec', 'CollectLimitExec', 'DeserializeToObjectExec')
 @pytest.mark.skipif(is_before_spark_340(), reason='`preferDate` is only supported in Spark 340+')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_csv_prefer_date_with_infer_schema(spark_tmp_path):
     # start date ""0001-01-02" required due to: https://github.com/NVIDIA/spark-rapids/issues/5606
     data_gens = [byte_gen, short_gen, int_gen, long_gen, boolean_gen, timestamp_gen, DateGen(start=date(1, 1, 2))]
diff --git a/integration_tests/src/main/python/datasourcev2_read_test.py b/integration_tests/src/main/python/datasourcev2_read_test.py
index c4834a53c1c..cc141700cb8 100644
--- a/integration_tests/src/main/python/datasourcev2_read_test.py
+++ b/integration_tests/src/main/python/datasourcev2_read_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_row_counts_equal
+from conftest import is_not_utc
 from marks import *
 
 columnarClass = 'com.nvidia.spark.rapids.tests.datasourcev2.parquet.ArrowColumnarDataSourceV2'
@@ -26,26 +27,31 @@ def readTable(types, classToUse):
         .orderBy("col1")
 
 @validate_execs_in_gpu_plan('HostColumnarToGpu')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_int():
     assert_gpu_and_cpu_are_equal_collect(readTable("int", columnarClass))
 
 @validate_execs_in_gpu_plan('HostColumnarToGpu')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_strings():
     assert_gpu_and_cpu_are_equal_collect(readTable("string", columnarClass))
 
 @validate_execs_in_gpu_plan('HostColumnarToGpu')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_all_types():
     assert_gpu_and_cpu_are_equal_collect(
        readTable("int,bool,byte,short,long,string,float,double,date,timestamp", columnarClass),
             conf={'spark.rapids.sql.castFloatToString.enabled': 'true'})
 
 @validate_execs_in_gpu_plan('HostColumnarToGpu')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_all_types_count():
     assert_gpu_and_cpu_row_counts_equal(
        readTable("int,bool,byte,short,long,string,float,double,date,timestamp", columnarClass),
             conf={'spark.rapids.sql.castFloatToString.enabled': 'true'})
 
 @validate_execs_in_gpu_plan('HostColumnarToGpu')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_arrow_off():
     assert_gpu_and_cpu_are_equal_collect(
         readTable("int,bool,byte,short,long,string,float,double,date,timestamp", columnarClass),
diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py
index 0d4f457f8e8..1d984193f9e 100644
--- a/integration_tests/src/main/python/date_time_test.py
+++ b/integration_tests/src/main/python/date_time_test.py
@@ -14,6 +14,7 @@
 
 import pytest
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_gpu_and_cpu_error
+from conftest import is_not_utc
 from data_gen import *
 from datetime import date, datetime, timezone
 from marks import ignore_order, incompat, allow_non_gpu
@@ -25,6 +26,7 @@
 vals = [(-584, 1563), (1943, 1101), (2693, 2167), (2729, 0), (44, 1534), (2635, 3319),
             (1885, -2828), (0, 2463), (932, 2286), (0, 0)]
 @pytest.mark.parametrize('data_gen', vals, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timesub(data_gen):
     days, seconds = data_gen
     assert_gpu_and_cpu_are_equal_collect(
@@ -33,6 +35,7 @@ def test_timesub(data_gen):
             .selectExpr("a - (interval {} days {} seconds)".format(days, seconds)))
 
 @pytest.mark.parametrize('data_gen', vals, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timeadd(data_gen):
     days, seconds = data_gen
     assert_gpu_and_cpu_are_equal_collect(
@@ -42,6 +45,7 @@ def test_timeadd(data_gen):
             .selectExpr("a + (interval {} days {} seconds)".format(days, seconds)))
 
 @pytest.mark.skipif(is_before_spark_330(), reason='DayTimeInterval is not supported before Pyspark 3.3.0')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timeadd_daytime_column():
     gen_list = [
         # timestamp column max year is 1000
@@ -59,6 +63,7 @@ def test_interval_seconds_overflow_exception():
         error_message="IllegalArgumentException")
 
 @pytest.mark.parametrize('data_gen', vals, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timeadd_from_subquery(data_gen):
 
     def fun(spark):
@@ -70,6 +75,7 @@ def fun(spark):
     assert_gpu_and_cpu_are_equal_collect(fun)
 
 @pytest.mark.parametrize('data_gen', vals, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timesub_from_subquery(data_gen):
 
     def fun(spark):
@@ -85,6 +91,7 @@ def fun(spark):
 # [SPARK-34896][SQL] Return day-time interval from dates subtraction
 # 1. Add the SQL config `spark.sql.legacy.interval.enabled` which will control when Spark SQL should use `CalendarIntervalType` instead of ANSI intervals.
 @pytest.mark.parametrize('data_gen', vals, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_dateaddinterval(data_gen):
     days, seconds = data_gen
     assert_gpu_and_cpu_are_equal_collect(
@@ -95,6 +102,7 @@ def test_dateaddinterval(data_gen):
 
 # test add days(not specify hours, minutes, seconds, milliseconds, microseconds) in ANSI mode.
 @pytest.mark.parametrize('data_gen', vals, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_dateaddinterval_ansi(data_gen):
     days, _ = data_gen
     # only specify the `days`
@@ -122,14 +130,17 @@ def test_datediff(data_gen):
             'datediff(a, date(null))',
             'datediff(a, \'2016-03-02\')'))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hour():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('hour(a)'))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_minute():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('minute(a)'))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_second():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('second(a)'))
@@ -188,6 +199,7 @@ def test_datesub(data_gen):
 to_unix_timestamp_days_gen=[ByteGen(), ShortGen(), IntegerGen(min_val=-106032829, max_val=103819094, special_cases=[-106032829, 103819094,0,1,-1])]
 @pytest.mark.parametrize('data_gen', to_unix_timestamp_days_gen, ids=idfn)
 @incompat
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_dateadd_with_date_overflow(data_gen):
     string_type = to_cast_string(data_gen.data_type)
     assert_gpu_and_cpu_are_equal_collect(
@@ -201,6 +213,7 @@ def test_dateadd_with_date_overflow(data_gen):
 to_unix_timestamp_days_gen=[ByteGen(), ShortGen(), IntegerGen(max_val=106032829, min_val=-103819094, special_cases=[106032829, -103819094,0,1,-1])]
 @pytest.mark.parametrize('data_gen', to_unix_timestamp_days_gen, ids=idfn)
 @incompat
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_datesub_with_date_overflow(data_gen):
     string_type = to_cast_string(data_gen.data_type)
     assert_gpu_and_cpu_are_equal_collect(
@@ -232,6 +245,7 @@ def test_dayofyear(data_gen):
             lambda spark : unary_op_df(spark, data_gen).select(f.dayofyear(f.col('a'))))
 
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_unix_timestamp(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col('a'))))
@@ -248,6 +262,7 @@ def test_unsupported_fallback_unix_timestamp(data_gen):
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_to_unix_timestamp(data_gen, ansi_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("to_unix_timestamp(a)"),
@@ -265,6 +280,7 @@ def test_unsupported_fallback_to_unix_timestamp(data_gen):
 
 @pytest.mark.parametrize('time_zone', ["UTC", "UTC+0", "UTC-0", "GMT", "GMT+0", "GMT-0"], ids=idfn)
 @pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_utc_timestamp(data_gen, time_zone):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)))
@@ -280,6 +296,7 @@ def test_from_utc_timestamp_unsupported_timezone_fallback(data_gen, time_zone):
 
 @pytest.mark.parametrize('time_zone', ["UTC", "Asia/Shanghai", "EST", "MST", "VST"], ids=idfn)
 @pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_utc_timestamp_supported_timezones(data_gen, time_zone):
     # Remove spark.rapids.test.CPU.timezone configuration when GPU kernel is ready to really test on GPU
     assert_gpu_and_cpu_are_equal_collect(
@@ -337,6 +354,7 @@ def fun(spark):
 
 @pytest.mark.parametrize('parser_policy', ["CORRECTED", "EXCEPTION"], ids=idfn)
 # first get expected string via `date_format`
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_string_to_timestamp_functions_ansi_valid(parser_policy):
     expr_format = "{operator}(date_format(a, '{fmt}'), '{fmt}')"
     formats = ['yyyy-MM-dd', 'yyyy/MM/dd', 'yyyy-MM', 'yyyy/MM', 'dd/MM/yyyy', 'yyyy-MM-dd HH:mm:ss',
@@ -354,6 +372,7 @@ def fun(spark):
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_unix_timestamp_improved(data_gen, ansi_enabled):
     conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true",
             "spark.sql.legacy.timeParserPolicy": "CORRECTED"}
@@ -363,6 +382,7 @@ def test_unix_timestamp_improved(data_gen, ansi_enabled):
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_unix_timestamp(data_gen, ansi_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col("a"))),
@@ -370,6 +390,7 @@ def test_unix_timestamp(data_gen, ansi_enabled):
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_to_unix_timestamp_improved(data_gen, ansi_enabled):
     conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true"}
     assert_gpu_and_cpu_are_equal_collect(
@@ -388,6 +409,7 @@ def invalid_date_string_df(spark):
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen,date_form', str_date_and_format_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_string_to_unix_timestamp(data_gen, date_form, ansi_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen, seed=1).selectExpr("to_unix_timestamp(a, '{}')".format(date_form)),
@@ -401,6 +423,7 @@ def test_string_to_unix_timestamp_ansi_exception():
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen,date_form', str_date_and_format_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_string_unix_timestamp(data_gen, date_form, ansi_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen, seed=1).select(f.unix_timestamp(f.col('a'), date_form)),
@@ -414,6 +437,7 @@ def test_string_unix_timestamp_ansi_exception():
 
 @pytest.mark.parametrize('data_gen', [StringGen('200[0-9]-0[1-9]-[0-2][1-8]')], ids=idfn)
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_gettimestamp(data_gen, ansi_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(f.to_date(f.col("a"), "yyyy-MM-dd")),
@@ -421,6 +445,7 @@ def test_gettimestamp(data_gen, ansi_enabled):
 
 
 @pytest.mark.parametrize('data_gen', [StringGen('0[1-9]200[0-9]')], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_gettimestamp_format_MMyyyy(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).select(f.to_date(f.col("a"), "MMyyyy")))
@@ -435,6 +460,7 @@ def test_gettimestamp_ansi_exception():
                           'MM-dd', 'MM/dd', 'dd-MM', 'dd/MM']
 @pytest.mark.parametrize('date_format', supported_date_formats, ids=idfn)
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_date_format(data_gen, date_format):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("date_format(a, '{}')".format(date_format)))
@@ -469,6 +495,7 @@ def test_date_format_maybe(data_gen, date_format):
 
 @pytest.mark.parametrize('date_format', maybe_supported_date_formats, ids=idfn)
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_date_format_maybe_incompat(data_gen, date_format):
     conf = {"spark.rapids.sql.incompatibleDateFormats.enabled": "true"}
     assert_gpu_and_cpu_are_equal_collect(
@@ -480,6 +507,7 @@ def test_date_format_maybe_incompat(data_gen, date_format):
 # input_file_name(), otherwise filter happens before project.
 @allow_non_gpu('CollectLimitExec,FileSourceScanExec,DeserializeToObjectExec')
 @ignore_order()
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_date_format_mmyyyy_cast_canonicalization(spark_tmp_path):
     data_path = spark_tmp_path + '/CSV_DATA'
     gen = StringGen(pattern='[0][0-9][1][8-9][1-9][1-9]', nullable=False)
@@ -525,10 +553,12 @@ def test_unsupported_fallback_to_date():
 seconds_gens = [LongGen(min_val=-62135510400, max_val=253402214400), IntegerGen(), ShortGen(), ByteGen(), 
                 DoubleGen(min_exp=0, max_exp=32), ts_float_gen, DecimalGen(16, 6), DecimalGen(13, 3), DecimalGen(10, 0), DecimalGen(7, -3), DecimalGen(6, 6)]
 @pytest.mark.parametrize('data_gen', seconds_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_seconds(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_seconds(a)"))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_seconds_long_overflow():
     assert_gpu_and_cpu_error(
         lambda spark : unary_op_df(spark, long_gen).selectExpr("timestamp_seconds(a)").collect(),
@@ -536,6 +566,7 @@ def test_timestamp_seconds_long_overflow():
         error_message='long overflow')
     
 @pytest.mark.parametrize('data_gen', [DecimalGen(7, 7), DecimalGen(20, 7)], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_seconds_rounding_necessary(data_gen):
     assert_gpu_and_cpu_error(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_seconds(a)").collect(),
@@ -543,6 +574,7 @@ def test_timestamp_seconds_rounding_necessary(data_gen):
         error_message='Rounding necessary')
     
 @pytest.mark.parametrize('data_gen', [DecimalGen(19, 6), DecimalGen(20, 6)], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_seconds_decimal_overflow(data_gen):
     assert_gpu_and_cpu_error(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_seconds(a)").collect(),
@@ -551,10 +583,12 @@ def test_timestamp_seconds_decimal_overflow(data_gen):
 
 millis_gens = [LongGen(min_val=-62135510400000, max_val=253402214400000), IntegerGen(), ShortGen(), ByteGen()]
 @pytest.mark.parametrize('data_gen', millis_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_millis(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_millis(a)"))
     
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_millis_long_overflow():
     assert_gpu_and_cpu_error(
         lambda spark : unary_op_df(spark, long_gen).selectExpr("timestamp_millis(a)").collect(),
@@ -563,6 +597,7 @@ def test_timestamp_millis_long_overflow():
 
 micros_gens = [LongGen(min_val=-62135510400000000, max_val=253402214400000000), IntegerGen(), ShortGen(), ByteGen()]
 @pytest.mark.parametrize('data_gen', micros_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_micros(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_micros(a)"))
diff --git a/integration_tests/src/main/python/expand_exec_test.py b/integration_tests/src/main/python/expand_exec_test.py
index d53000e9849..abb9a7bd094 100644
--- a/integration_tests/src/main/python/expand_exec_test.py
+++ b/integration_tests/src/main/python/expand_exec_test.py
@@ -14,6 +14,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_equal
+from conftest import is_not_utc
 from data_gen import *
 import pyspark.sql.functions as f
 from marks import ignore_order
@@ -22,6 +23,7 @@
 # Many Spark versions have issues sorting large decimals,
 # see https://issues.apache.org/jira/browse/SPARK-40089.
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_expand_exec(data_gen):
     def op_df(spark, length=2048):
         return gen_df(spark, StructGen([
diff --git a/integration_tests/src/main/python/explain_test.py b/integration_tests/src/main/python/explain_test.py
index b84754a3d3f..1837f31aa95 100644
--- a/integration_tests/src/main/python/explain_test.py
+++ b/integration_tests/src/main/python/explain_test.py
@@ -14,6 +14,7 @@
 
 import pytest
 
+from conftest import is_not_utc
 from data_gen import *
 from marks import *
 from pyspark.sql.functions import *
@@ -49,6 +50,7 @@ def do_join_explain(spark):
 
     with_cpu_session(do_join_explain)
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explain_set_config():
     conf = {'spark.rapids.sql.hasExtendedYearValues': 'false',
             'spark.rapids.sql.castStringToTimestamp.enabled': 'true'}
diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py
index b51fa5a55ef..a12bd223778 100644
--- a/integration_tests/src/main/python/fastparquet_compatibility_test.py
+++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from fastparquet_utils import get_fastparquet_result_canonicalizer
 from spark_session import is_databricks_runtime, spark_version, with_cpu_session, with_gpu_session
@@ -141,6 +142,7 @@ def read_with_fastparquet_or_plugin(spark):
         marks=pytest.mark.xfail(is_databricks_runtime(),
                                 reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
     """
     This test writes data_gen output to Parquet via Apache Spark, then verifies that fastparquet and the RAPIDS
@@ -207,6 +209,7 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
                               end=pandas_min_datetime),
                  marks=pytest.mark.xfail(reason="fastparquet reads timestamps preceding 1900 incorrectly.")),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_reading_file_written_with_gpu(spark_tmp_path, column_gen):
     """
     This test writes the data-gen output to file via the RAPIDS plugin, then checks that the data is read identically
@@ -389,6 +392,7 @@ def write_with_fastparquet(spark, data_gen):
         marks=pytest.mark.xfail(reason="fastparquet fails to read nullable Struct columns written from Apache Spark. "
                                        "It fails the rewrite to parquet, thereby failing the test.")),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_reading_file_rewritten_with_fastparquet(column_gen, time_format, spark_tmp_path):
     """
     This test is a workaround to test data-types that have problems being converted
diff --git a/integration_tests/src/main/python/generate_expr_test.py b/integration_tests/src/main/python/generate_expr_test.py
index 46ac5c92350..cde16352236 100644
--- a/integration_tests/src/main/python/generate_expr_test.py
+++ b/integration_tests/src/main/python/generate_expr_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect
+from conftest import is_not_utc
 from data_gen import *
 from marks import allow_non_gpu, ignore_order
 from pyspark.sql.types import *
@@ -37,6 +38,7 @@ def four_op_df(spark, gen, length=2048):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_makearray(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : four_op_df(spark, data_gen).selectExpr('a', 'explode(array(b, c, d))'))
@@ -45,6 +47,7 @@ def test_explode_makearray(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_litarray(data_gen):
     array_lit = with_cpu_session(
         lambda spark: gen_scalar(ArrayGen(data_gen, min_length=3, max_length=3, nullable=False)))
@@ -60,6 +63,7 @@ def test_explode_litarray(data_gen):
 @pytest.mark.parametrize('data_gen', explode_gens + struct_gens_sample_with_decimal128 +
                          array_gens_sample + map_gens_sample + arrays_with_binary + maps_with_binary,
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
     assert_gpu_and_cpu_are_equal_collect(
@@ -70,6 +74,7 @@ def test_explode_array_data(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('map_gen', map_gens_sample + decimal_128_map_gens + maps_with_binary, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_map_data(map_gen):
     data_gen = [int_gen, map_gen]
     assert_gpu_and_cpu_are_equal_collect(
@@ -80,6 +85,7 @@ def test_explode_map_data(map_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_nested_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(ArrayGen(data_gen))]
     assert_gpu_and_cpu_are_equal_collect(
@@ -94,6 +100,7 @@ def test_explode_nested_array_data(data_gen):
 @pytest.mark.parametrize('data_gen', explode_gens + struct_gens_sample_with_decimal128 +
                          array_gens_sample + arrays_with_binary + map_gens_sample + maps_with_binary,
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_outer_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
     assert_gpu_and_cpu_are_equal_collect(
@@ -104,6 +111,7 @@ def test_explode_outer_array_data(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('map_gen', map_gens_sample + decimal_128_map_gens + maps_with_binary, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_outer_map_data(map_gen):
     data_gen = [int_gen, map_gen]
     assert_gpu_and_cpu_are_equal_collect(
@@ -114,6 +122,7 @@ def test_explode_outer_map_data(map_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_explode_outer_nested_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(ArrayGen(data_gen))]
     assert_gpu_and_cpu_are_equal_collect(
@@ -125,6 +134,7 @@ def test_explode_outer_nested_array_data(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_makearray(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : four_op_df(spark, data_gen).selectExpr('posexplode(array(b, c, d))', 'a'))
@@ -133,6 +143,7 @@ def test_posexplode_makearray(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_litarray(data_gen):
     array_lit = with_cpu_session(
         lambda spark: gen_scalar(ArrayGen(data_gen, min_length=3, max_length=3, nullable=False)))
@@ -147,6 +158,7 @@ def test_posexplode_litarray(data_gen):
 @pytest.mark.parametrize('data_gen', explode_gens + struct_gens_sample_with_decimal128 +
                          array_gens_sample + arrays_with_binary + map_gens_sample + maps_with_binary,
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
     assert_gpu_and_cpu_are_equal_collect(
@@ -157,6 +169,7 @@ def test_posexplode_array_data(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('map_gen', map_gens_sample + decimal_128_map_gens + maps_with_binary, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_map_data(map_gen):
     data_gen = [int_gen, map_gen]
     assert_gpu_and_cpu_are_equal_collect(
@@ -167,6 +180,7 @@ def test_posexplode_map_data(map_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_nested_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(ArrayGen(data_gen))]
     assert_gpu_and_cpu_are_equal_collect(
@@ -181,6 +195,7 @@ def test_posexplode_nested_array_data(data_gen):
 @pytest.mark.parametrize('data_gen', explode_gens + struct_gens_sample_with_decimal128 +
                          array_gens_sample + arrays_with_binary + map_gens_sample + maps_with_binary,
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_outer_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
     assert_gpu_and_cpu_are_equal_collect(
@@ -191,6 +206,7 @@ def test_posexplode_outer_array_data(data_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('map_gen', map_gens_sample + decimal_128_map_gens + maps_with_binary, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_outer_map_data(map_gen):
     data_gen = [int_gen, map_gen]
     assert_gpu_and_cpu_are_equal_collect(
@@ -201,6 +217,7 @@ def test_posexplode_outer_map_data(map_gen):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', explode_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_posexplode_nested_outer_array_data(data_gen):
     data_gen = [int_gen, ArrayGen(ArrayGen(data_gen))]
     assert_gpu_and_cpu_are_equal_collect(
@@ -225,6 +242,7 @@ def test_stack():
 
 # gpu stack not guarantee to produce the same output order as Spark does
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_stack_mixed_types():
     base_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen, string_gen, 
                   boolean_gen, date_gen, timestamp_gen, null_gen, DecimalGen(precision=7, scale=3),
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 4ecf42a9b42..0c99fc4516a 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -15,11 +15,9 @@
 import math
 import pytest
 
-from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_row_counts_equal,\
-    assert_gpu_and_cpu_are_equal_sql,\
-    assert_gpu_fallback_collect, assert_cpu_and_gpu_are_equal_sql_with_capture,\
-    assert_cpu_and_gpu_are_equal_collect_with_capture, run_with_cpu, run_with_cpu_and_gpu
+from asserts import *
 from conftest import is_databricks_runtime
+from conftest import is_not_utc
 from data_gen import *
 from functools import reduce
 from pyspark.sql.types import *
@@ -336,6 +334,7 @@ def test_hash_grpby_sum_count_action(data_gen, override_split_until_size, overri
 @allow_non_gpu("SortAggregateExec", "SortExec", "ShuffleExchangeExec")
 @ignore_order
 @pytest.mark.parametrize('data_gen', _grpkey_nested_structs_with_array_basic_child + _grpkey_list_with_non_nested_children, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_grpby_list_min_max(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100).coalesce(1).groupby('a').agg(f.min('b'), f.max('b'))
@@ -618,6 +617,7 @@ def test_decimal128_min_max_group_by(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _all_basic_gens_with_all_nans_cases, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_min_max_group_by(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, byte_gen, data_gen)
@@ -633,6 +633,7 @@ def test_min_max_group_by(data_gen):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_list_op, ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', [True, False], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_list(data_gen, use_obj_hash_agg):
     def doit(spark):
         df = gen_df(spark, data_gen, length=100)\
@@ -664,6 +665,7 @@ def doit(spark):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _full_gen_data_for_collect_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_set(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -672,6 +674,7 @@ def test_hash_groupby_collect_set(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_set_on_nested_type(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -686,6 +689,7 @@ def test_hash_groupby_collect_set_on_nested_type(data_gen):
 @ignore_order(local=True)
 @allow_non_gpu("ProjectExec", "SortArray")
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op_nested, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_set_on_nested_array_type(data_gen):
     conf = copy_and_update(_float_conf, {
         "spark.rapids.sql.castFloatToString.enabled": "true",
@@ -707,6 +711,7 @@ def do_it(spark):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _full_gen_data_for_collect_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_reduction_collect_set(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -714,6 +719,7 @@ def test_hash_reduction_collect_set(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_reduction_collect_set_on_nested_type(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -727,6 +733,7 @@ def test_hash_reduction_collect_set_on_nested_type(data_gen):
 @ignore_order(local=True)
 @allow_non_gpu("ProjectExec", "SortArray")
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op_nested, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_reduction_collect_set_on_nested_array_type(data_gen):
     conf = copy_and_update(_float_conf, {
         "spark.rapids.sql.castFloatToString.enabled": "true",
@@ -746,6 +753,7 @@ def do_it(spark):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _full_gen_data_for_collect_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_with_single_distinct(data_gen):
     # test collect_ops with other distinct aggregations
     assert_gpu_and_cpu_are_equal_collect(
@@ -758,6 +766,7 @@ def test_hash_groupby_collect_with_single_distinct(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_single_distinct_collect(data_gen):
     # test distinct collect
     sql = """select a,
@@ -781,6 +790,7 @@ def test_hash_groupby_single_distinct_collect(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_op, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_with_multi_distinct(data_gen):
     def spark_fn(spark_session):
         return gen_df(spark_session, data_gen, length=100).groupby('a').agg(
@@ -807,6 +817,7 @@ def spark_fn(spark_session):
 @pytest.mark.parametrize('replace_mode', _replace_modes_non_distinct, ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_partial_replace_fallback(data_gen,
                                                        replace_mode,
                                                        aqe_enabled,
@@ -854,6 +865,7 @@ def test_hash_groupby_collect_partial_replace_fallback(data_gen,
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn)
 @pytest.mark.xfail(condition=is_databricks104_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/4963')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_groupby_collect_partial_replace_with_distinct_fallback(data_gen,
                                                                      replace_mode,
                                                                      aqe_enabled,
@@ -1252,6 +1264,7 @@ def test_first_last_reductions_decimal_types(data_gen):
             'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'))
 
 @pytest.mark.parametrize('data_gen', _nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_first_last_reductions_nested_types(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         # Coalesce and sort are to make sure that first and last, which are non-deterministic
@@ -1260,6 +1273,7 @@ def test_first_last_reductions_nested_types(data_gen):
             'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'))
 
 @pytest.mark.parametrize('data_gen', _all_basic_gens_with_all_nans_cases, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_generic_reductions(data_gen):
     local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
@@ -1277,6 +1291,7 @@ def test_generic_reductions(data_gen):
         conf=local_conf)
 
 @pytest.mark.parametrize('data_gen', all_gen + _nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_count(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen) \
@@ -1288,6 +1303,7 @@ def test_count(data_gen):
         conf = {'spark.sql.legacy.allowParameterlessCount': 'true'})
 
 @pytest.mark.parametrize('data_gen', all_basic_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_distinct_count_reductions(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).selectExpr(
@@ -1311,6 +1327,7 @@ def test_arithmetic_reductions(data_gen):
 @pytest.mark.parametrize('data_gen',
                          all_basic_gens + decimal_gens + _nested_gens,
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_collect_list_reductions(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         # coalescing because collect_list is not deterministic
@@ -1329,6 +1346,7 @@ def test_collect_list_reductions(data_gen):
 @pytest.mark.parametrize('data_gen',
                          _no_neg_zero_all_basic_gens + decimal_gens + _struct_only_nested_gens,
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_collect_set_reductions(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr('sort_array(collect_set(a))'),
@@ -1342,6 +1360,7 @@ def test_collect_empty():
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen + _nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_groupby_first_last(data_gen):
     gen_fn = [('a', RepeatSeqGen(LongGen(), length=20)), ('b', data_gen)]
     agg_fn = lambda df: df.groupBy('a').agg(
@@ -1355,6 +1374,7 @@ def test_groupby_first_last(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen + _struct_only_nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sorted_groupby_first_last(data_gen):
     gen_fn = [('a', RepeatSeqGen(LongGen(), length=20)), ('b', data_gen)]
     # sort by more than the group by columns to be sure that first/last don't remove the ordering
@@ -1372,6 +1392,7 @@ def test_sorted_groupby_first_last(data_gen):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('count_func', [f.count, f.countDistinct])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_agg_count(data_gen, count_func):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : gen_df(spark, [('a', data_gen), ('b', data_gen)],
@@ -2028,6 +2049,7 @@ def test_std_variance_partial_replace_fallback(data_gen,
     null_gen] + array_gens_sample + struct_gens_sample
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen',  gens_for_max_min, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_min_max_in_groupby_and_reduction(data_gen):
     df_gen = [('a', data_gen), ('b', RepeatSeqGen(IntegerGen(), length=20))]
 
diff --git a/integration_tests/src/main/python/hashing_test.py b/integration_tests/src/main/python/hashing_test.py
index 6bd56da933d..e2a753ecaeb 100644
--- a/integration_tests/src/main/python/hashing_test.py
+++ b/integration_tests/src/main/python/hashing_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect
+from conftest import is_not_utc
 from data_gen import *
 from marks import allow_non_gpu, ignore_order
 from spark_session import is_before_spark_320
@@ -46,11 +47,13 @@
 
 @ignore_order(local=True)
 @pytest.mark.parametrize("gen", _xxhash_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_xxhash64_single_column(gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, gen).selectExpr("a", "xxhash64(a)"))
 
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_xxhash64_multi_column():
     gen = StructGen(_struct_of_xxhash_gens.children, nullable=False)
     col_list = ",".join(gen.data_type.fieldNames())
diff --git a/integration_tests/src/main/python/hive_delimited_text_test.py b/integration_tests/src/main/python/hive_delimited_text_test.py
index e316f0df934..4d07a077ec0 100644
--- a/integration_tests/src/main/python/hive_delimited_text_test.py
+++ b/integration_tests/src/main/python/hive_delimited_text_test.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql, assert_gpu_and_cpu_sql_writes_are_equal_collect, assert_gpu_fallback_collect
-from conftest import get_non_gpu_allowed
+from conftest import get_non_gpu_allowed, is_not_utc
 from data_gen import *
 from enum import Enum
 from marks import *
@@ -187,6 +187,7 @@ def read_impl(spark):
     ('hive-delim-text/carriage-return', StructType([StructField("str", StringType())]), {}),
     ('hive-delim-text/carriage-return-err', StructType([StructField("str", StringType())]), {}),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_basic_hive_text_read(std_input_path, name, schema, spark_tmp_table_factory, options):
     assert_gpu_and_cpu_are_equal_collect(read_hive_text_sql(std_input_path + '/' + name,
                                                             schema, spark_tmp_table_factory, options),
@@ -239,6 +240,7 @@ def read_hive_text_table(spark, text_table_name, fields="my_field"):
                            "https://github.com/NVIDIA/spark-rapids/pull/7628")
 @approximate_float
 @pytest.mark.parametrize('data_gen', hive_text_supported_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hive_text_round_trip(spark_tmp_path, data_gen, spark_tmp_table_factory):
     gen = StructGen([('my_field', data_gen)], nullable=False)
     data_path = spark_tmp_path + '/hive_text_table'
@@ -282,6 +284,7 @@ def read_hive_text_table_partitions(spark, text_table_name, partition):
 @approximate_float
 @allow_non_gpu("EqualTo,IsNotNull,Literal")  # Accounts for partition predicate: `WHERE dt='1'`
 @pytest.mark.parametrize('data_gen', hive_text_supported_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hive_text_round_trip_partitioned(spark_tmp_path, data_gen, spark_tmp_table_factory):
     gen = StructGen([('my_field', data_gen)], nullable=False)
     data_path = spark_tmp_path + '/hive_text_table'
@@ -300,6 +303,7 @@ def test_hive_text_round_trip_partitioned(spark_tmp_path, data_gen, spark_tmp_ta
 @approximate_float
 @allow_non_gpu("EqualTo,IsNotNull,Literal,Or")  # Accounts for partition predicate
 @pytest.mark.parametrize('data_gen', hive_text_supported_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hive_text_round_trip_two_partitions(spark_tmp_path, data_gen, spark_tmp_table_factory):
     """
     Added to reproduce: https://github.com/NVIDIA/spark-rapids/issues/7383
@@ -525,6 +529,7 @@ def create_table_with_compressed_files(spark):
     ('hive-delim-text/carriage-return', StructType([StructField("str", StringType())]), {}),
     ('hive-delim-text/carriage-return-err', StructType([StructField("str", StringType())]), {}),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_basic_hive_text_write(std_input_path, input_dir, schema, spark_tmp_table_factory, mode, options):
     # Configure table options, including schema.
     if options is None:
diff --git a/integration_tests/src/main/python/hive_write_test.py b/integration_tests/src/main/python/hive_write_test.py
index d7de6f1084e..7bc5ceede85 100644
--- a/integration_tests/src/main/python/hive_write_test.py
+++ b/integration_tests/src/main/python/hive_write_test.py
@@ -14,14 +14,12 @@
 
 import pytest
 
-from asserts import assert_gpu_and_cpu_sql_writes_are_equal_collect, assert_gpu_fallback_collect, \
-    assert_gpu_and_cpu_are_equal_collect, assert_equal, run_with_cpu_and_gpu
-from conftest import spark_jvm
+from asserts import *
+from conftest import spark_jvm, is_not_utc
 from data_gen import *
 from datetime import date, datetime, timezone
 from marks import *
-from spark_session import is_hive_available, is_spark_33X, is_spark_340_or_later, with_cpu_session, \
-    is_databricks122_or_later
+from spark_session import *
 
 # Using timestamps from 1970 to work around a cudf ORC bug
 # https://github.com/NVIDIA/spark-rapids/issues/140.
@@ -61,6 +59,7 @@ def _restricted_timestamp(nullable=True):
 @pytest.mark.skipif(not is_hive_available(), reason="Hive is missing")
 @pytest.mark.parametrize("gens", _write_gens, ids=idfn)
 @pytest.mark.parametrize("storage", ["PARQUET", "nativeorc", "hiveorc"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_optimized_hive_ctas_basic(gens, storage, spark_tmp_table_factory):
     data_table = spark_tmp_table_factory.get()
     gen_list = [('c' + str(i), gen) for i, gen in enumerate(gens)]
diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py
index ee9a7fe1d8b..ba172715936 100644
--- a/integration_tests/src/main/python/join_test.py
+++ b/integration_tests/src/main/python/join_test.py
@@ -17,7 +17,7 @@
 from pyspark.sql.functions import array_contains, broadcast, col
 from pyspark.sql.types import *
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_cpu_and_gpu_are_equal_collect_with_capture
-from conftest import is_databricks_runtime, is_emr_runtime
+from conftest import is_databricks_runtime, is_emr_runtime, is_not_utc
 from data_gen import *
 from marks import ignore_order, allow_non_gpu, incompat, validate_execs_in_gpu_plan
 from spark_session import with_cpu_session, is_before_spark_330, is_databricks_runtime
@@ -170,6 +170,7 @@ def do_join(spark):
     (all_gen, '1g'),
     (join_small_batch_gens, '1000')), ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join(data_gen, join_type, batch_size):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
@@ -180,6 +181,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_ridealong(data_gen, join_type):
     def do_join(spark):
         left, right = create_ridealong_df(spark, short_gen, data_gen, 500, 500)
@@ -193,6 +195,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', single_level_array_gens + [binary_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_wrong_key_fallback(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
@@ -212,6 +215,7 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
 @pytest.mark.parametrize('sub_part_enabled', ['false', 'true'], ids=['SubPartition_OFF', 'SubPartition_ON'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_join_ridealong(data_gen, join_type, sub_part_enabled):
     def do_join(spark):
         left, right = create_ridealong_df(spark, short_gen, data_gen, 50, 500)
@@ -228,6 +232,7 @@ def do_join(spark):
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_right_table(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
@@ -239,6 +244,7 @@ def do_join(spark):
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_right_table_ridealong(data_gen, join_type):
     def do_join(spark):
         left, right = create_ridealong_df(spark, short_gen, data_gen, 500, 500)
@@ -252,6 +258,7 @@ def do_join(spark):
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_right_table_with_job_group(data_gen, join_type):
     with_cpu_session(lambda spark : spark.sparkContext.setJobGroup("testjob1", "test", False))
     def do_join(spark):
@@ -266,6 +273,7 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen,batch_size', join_batch_size_test_params(
     (all_gen + basic_nested_gens, '1g'),
     (join_small_batch_gens + [basic_struct_gen, ArrayGen(string_gen)], '100')), ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cartesian_join(data_gen, batch_size):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -305,6 +313,7 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen,batch_size', join_batch_size_test_params(
     (all_gen, '1g'),
     (join_small_batch_gens, '100')), ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_cartesian_join_with_condition(data_gen, batch_size):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -322,6 +331,7 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen,batch_size', join_batch_size_test_params(
     (all_gen + basic_nested_gens, '1g'),
     (join_small_batch_gens, '100')), ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_nested_loop_join(data_gen, batch_size):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -357,6 +367,7 @@ def do_join(spark):
     (join_ast_gen, '1g'),
     ([int_gen], 100)), ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti', 'Cross'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_right_broadcast_nested_loop_join_with_ast_condition(data_gen, join_type, batch_size):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -371,6 +382,7 @@ def do_join(spark):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_ast_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_left_broadcast_nested_loop_join_with_ast_condition(data_gen):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -429,6 +441,7 @@ def do_join(spark):
                                       float_gen, double_gen,
                                       string_gen, boolean_gen, date_gen, timestamp_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_nested_loop_join_with_array_contains(data_gen, join_type):
     arr_gen = ArrayGen(data_gen)
     literal = with_cpu_session(lambda spark: gen_scalar(data_gen))
@@ -441,6 +454,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_right_broadcast_nested_loop_join_condition_missing(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -456,6 +470,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Right'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_left_broadcast_nested_loop_join_condition_missing(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -470,6 +485,7 @@ def do_join(spark):
 
 @pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens + [binary_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_right_broadcast_nested_loop_join_condition_missing_count(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -478,6 +494,7 @@ def do_join(spark):
 
 @pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens + [binary_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Right'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_left_broadcast_nested_loop_join_condition_missing_count(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -488,6 +505,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['LeftOuter', 'LeftSemi', 'LeftAnti', 'FullOuter'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_nested_loop_join_with_conditionals_build_left_fallback(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -498,6 +516,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['RightOuter', 'FullOuter'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_nested_loop_with_conditionals_build_right_fallback(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
@@ -514,6 +533,7 @@ def do_join(spark):
 # Specify 200 shuffle partitions to test cases where streaming side is empty
 # as in https://github.com/NVIDIA/spark-rapids/issues/7516
 @pytest.mark.parametrize('shuffle_conf', [{}, {'spark.sql.shuffle.partitions': 200}], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_left_table(data_gen, join_type, shuffle_conf):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 250, 500)
@@ -525,6 +545,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_with_conditionals(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
@@ -579,6 +600,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_with_condition_ast(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
@@ -695,6 +717,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_struct_as_key(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
@@ -706,6 +729,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_struct_mixed_key(data_gen, join_type):
     def do_join(spark):
         left = two_col_df(spark, data_gen, int_gen, length=500)
@@ -718,6 +742,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_struct_mixed_key_with_null_filter(data_gen, join_type):
     def do_join(spark):
         left = two_col_df(spark, data_gen, int_gen, length=500)
@@ -732,6 +757,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_right_struct_as_key(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
@@ -743,6 +769,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_broadcast_join_right_struct_mixed_key(data_gen, join_type):
     def do_join(spark):
         left = two_col_df(spark, data_gen, int_gen, length=500)
@@ -767,6 +794,7 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['FullOuter'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sortmerge_join_struct_as_key_fallback(data_gen, join_type):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
index bb99a01425f..41571a203d5 100644
--- a/integration_tests/src/main/python/json_test.py
+++ b/integration_tests/src/main/python/json_test.py
@@ -15,14 +15,13 @@
 import pyspark.sql.functions as f
 import pytest
 
-from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_and_cpu_row_counts_equal, \
-    assert_gpu_fallback_collect, assert_cpu_and_gpu_are_equal_collect_with_capture
+from asserts import *
 from data_gen import *
+from conftest import is_not_utc
 from datetime import timezone
 from conftest import is_databricks_runtime
 from marks import approximate_float, allow_non_gpu, ignore_order
-from spark_session import with_cpu_session, with_gpu_session, is_before_spark_320, is_before_spark_330, is_before_spark_340, \
-    is_before_spark_341
+from spark_session import *
 
 json_supported_gens = [
     # Spark does not escape '\r' or '\n' even though it uses it to mark end of record
@@ -185,6 +184,7 @@ def test_json_date_formats_round_trip(spark_tmp_path, date_format, v1_enabled_li
 @pytest.mark.parametrize('ts_part', json_supported_ts_parts)
 @pytest.mark.parametrize('date_format', json_supported_date_formats)
 @pytest.mark.parametrize('v1_enabled_list', ["", "json"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_json_ts_formats_round_trip(spark_tmp_path, date_format, ts_part, v1_enabled_list):
     full_format = date_format + ts_part
     data_gen = TimestampGen()
@@ -208,6 +208,7 @@ def test_json_ts_formats_round_trip(spark_tmp_path, date_format, ts_part, v1_ena
 @pytest.mark.parametrize('ts_part', json_supported_ts_parts)
 @pytest.mark.parametrize('date_format', json_supported_date_formats)
 @pytest.mark.parametrize("timestamp_type", ["TIMESTAMP_LTZ", "TIMESTAMP_NTZ"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_json_ts_formats_round_trip_ntz_v1(spark_tmp_path, date_format, ts_part, timestamp_type):
     json_ts_formats_round_trip_ntz(spark_tmp_path, date_format, ts_part, timestamp_type, 'json', 'FileSourceScanExec')
 
@@ -216,6 +217,7 @@ def test_json_ts_formats_round_trip_ntz_v1(spark_tmp_path, date_format, ts_part,
 @pytest.mark.parametrize('ts_part', json_supported_ts_parts)
 @pytest.mark.parametrize('date_format', json_supported_date_formats)
 @pytest.mark.parametrize("timestamp_type", ["TIMESTAMP_LTZ", "TIMESTAMP_NTZ"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_json_ts_formats_round_trip_ntz_v2(spark_tmp_path, date_format, ts_part, timestamp_type):
     json_ts_formats_round_trip_ntz(spark_tmp_path, date_format, ts_part, timestamp_type, '', 'BatchScanExec')
 
@@ -395,6 +397,7 @@ def test_json_read_invalid_dates(std_input_path, filename, schema, read_func, an
     'CORRECTED',
     'EXCEPTION'
 ])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_json_read_valid_timestamps(std_input_path, filename, schema, read_func, ansi_enabled, time_parser_policy, \
         spark_tmp_table_factory):
     updated_conf = copy_and_update(_enable_all_types_conf,
@@ -452,6 +455,7 @@ def test_json_read_count(spark_tmp_path, v1_enabled_list):
             lambda spark : spark.read.schema(schema).json(data_path),
             conf=updated_conf)
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_map():
     # The test here is working around some inconsistencies in how the keys are parsed for maps
     # on the GPU the keys are dense, but on the CPU they are sparse
@@ -486,6 +490,7 @@ def test_from_json_map_fallback():
     'struct<c:int,a:string>',
     'struct<a:string,a:string>',
     ])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct(schema):
     # note that column 'a' does not use leading zeroes due to https://github.com/NVIDIA/spark-rapids/issues/9588
     json_string_gen = StringGen(r'{"a": [1-9]{0,5}, "b": "[A-Z]{0,5}", "c": 1\d\d\d}') \
@@ -505,6 +510,7 @@ def test_from_json_struct(schema):
     r'{ "bool": [0-9]{4}-[0-9]{2}-[0-9]{2} }',
     r'{ "bool": "[0-9]{4}-[0-9]{2}-[0-9]{2}" }'
 ])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_boolean(pattern):
     json_string_gen = StringGen(pattern) \
         .with_special_case('', weight=50) \
@@ -514,6 +520,7 @@ def test_from_json_struct_boolean(pattern):
             .select(f.col('a'), f.from_json('a', 'struct<bool:boolean>')),
         conf={"spark.rapids.sql.expression.JsonToStructs": True})
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_decimal():
     json_string_gen = StringGen(r'{ "a": "[+-]?([0-9]{0,5})?(\.[0-9]{0,2})?([eE][+-]?[0-9]{1,2})?" }') \
         .with_special_pattern('', weight=50) \
@@ -553,6 +560,7 @@ def test_from_json_struct_decimal():
     pytest.param("LEGACY", marks=pytest.mark.allow_non_gpu('ProjectExec')),
     "CORRECTED"
 ])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_date(date_gen, date_format, time_parser_policy):
     json_string_gen = StringGen(r'{ "a": ' + date_gen + ' }') \
         .with_special_case('{ "a": null }') \
@@ -635,6 +643,7 @@ def test_from_json_struct_date_fallback_non_default_format(date_gen, date_format
     "CORRECTED"
 ])
 @pytest.mark.parametrize('ansi_enabled', [ True, False ])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_timestamp(timestamp_gen, timestamp_format, time_parser_policy, ansi_enabled):
     json_string_gen = StringGen(r'{ "a": ' + timestamp_gen + ' }') \
         .with_special_case('{ "a": null }') \
@@ -686,6 +695,7 @@ def test_from_json_struct_timestamp_fallback_non_default_format(timestamp_gen, t
 @pytest.mark.parametrize('schema', ['struct<teacher:string>',
                                     'struct<student:struct<name:string,age:int>>',
                                     'struct<teacher:string,student:struct<name:string,age:int>>'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_of_struct(schema):
     json_string_gen = StringGen(r'{"teacher": "[A-Z]{1}[a-z]{2,5}",' \
                                 r'"student": {"name": "[A-Z]{1}[a-z]{2,5}", "age": 1\d}}') \
@@ -700,6 +710,7 @@ def test_from_json_struct_of_struct(schema):
 @pytest.mark.parametrize('schema', ['struct<teacher:string>',
                                     'struct<student:array<struct<name:string,class:string>>>',
                                     'struct<teacher:string,student:array<struct<name:string,class:string>>>'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_of_list(schema):
     json_string_gen = StringGen(r'{"teacher": "[A-Z]{1}[a-z]{2,5}",' \
                                 r'"student": \[{"name": "[A-Z]{1}[a-z]{2,5}", "class": "junior"},' \
@@ -712,6 +723,7 @@ def test_from_json_struct_of_list(schema):
         conf={"spark.rapids.sql.expression.JsonToStructs": True})
 
 @pytest.mark.parametrize('schema', ['struct<a:string>', 'struct<a:string,b:int>'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_json_struct_all_empty_string_input(schema):
     json_string_gen = StringGen('')
     assert_gpu_and_cpu_are_equal_collect(
@@ -788,6 +800,7 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
     pytest.param(True, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9517')),
     False
 ])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty):
     struct_gen = StructGen([
         ('a', data_gen),
@@ -811,4 +824,4 @@ def struct_to_json(spark):
 
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : struct_to_json(spark),
-        conf=conf)
\ No newline at end of file
+        conf=conf)
diff --git a/integration_tests/src/main/python/limit_test.py b/integration_tests/src/main/python/limit_test.py
index 5e116b00654..efe81c1058a 100644
--- a/integration_tests/src/main/python/limit_test.py
+++ b/integration_tests/src/main/python/limit_test.py
@@ -15,11 +15,13 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from spark_session import is_before_spark_340
 from marks import allow_non_gpu, approximate_float
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + decimal_gens + array_gens_sample + map_gens_sample + struct_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_simple_limit(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         # We need some processing after the limit to avoid a CollectLimitExec
@@ -80,6 +82,7 @@ def test_non_zero_offset_with_limit(limit, offset, batch_size):
 @pytest.mark.skipif(is_before_spark_340(), reason='offset is introduced from Spark 3.4.0')
 @allow_non_gpu('ShuffleExchangeExec') # when limit = 0, ShuffleExchangeExec is not replaced.
 @approximate_float
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_order_by_offset_with_limit(limit, offset, data_gen, batch_size):
     # In CPU version of spark, (limit, offset) can not be negative number.
     # Test case description:
diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py
index 8504b38d00d..5daeb916e22 100644
--- a/integration_tests/src/main/python/map_test.py
+++ b/integration_tests/src/main/python/map_test.py
@@ -14,12 +14,12 @@
 
 import pytest
 
-from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, \
-    assert_gpu_fallback_collect, assert_cpu_and_gpu_are_equal_collect_with_capture
+from asserts import *
+from conftest import is_not_utc
 from data_gen import *
 from conftest import is_databricks_runtime
 from marks import allow_non_gpu, ignore_order, datagen_overrides
-from spark_session import is_before_spark_330, is_databricks104_or_later, is_databricks113_or_later, is_spark_33X, is_spark_340_or_later
+from spark_session import *
 from pyspark.sql.functions import create_map, col, lit, row_number
 from pyspark.sql.types import *
 from pyspark.sql.types import IntegralType
@@ -57,6 +57,7 @@
 
 
 @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -68,6 +69,7 @@ def test_map_keys(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_values(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -79,6 +81,7 @@ def test_map_values(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_entries(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -114,6 +117,7 @@ def decimal_value_gen():
                          [MapGen(StringGen(pattern='key_[0-9]', nullable=False), value(), max_length=6)
                           for value in get_map_value_gens()],
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_string_keys(data_gen):
     index_gen = StringGen()
     assert_gpu_and_cpu_are_equal_collect(
@@ -137,6 +141,7 @@ def test_get_map_value_string_keys(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', numeric_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_numeric_keys(data_gen):
     key_gen = data_gen._key_gen
     assert_gpu_and_cpu_are_equal_collect(
@@ -150,6 +155,7 @@ def test_get_map_value_numeric_keys(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_supported_keys(data_gen):
     key_gen = data_gen._key_gen
     # first expression is not guaranteed to hit
@@ -188,6 +194,7 @@ def query_map_scalar(spark):
 @allow_non_gpu('WindowLocalExec')
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9683')
 @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_scalars_supported_key_types(data_gen):
     key_gen = data_gen._key_gen
     def query_map_scalar(spark):
@@ -225,6 +232,7 @@ def query_map_scalar(spark):
 @pytest.mark.parametrize('data_gen',
                          [MapGen(DateGen(nullable=False), value(), max_length=6)
                           for value in get_map_value_gens()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_date_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -236,6 +244,7 @@ def test_get_map_value_date_keys(data_gen):
 @pytest.mark.parametrize('data_gen',
                          [MapGen(TimestampGen(nullable=False), value(), max_length=6)
                           for value in get_map_value_gens()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_timestamp_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -254,6 +263,7 @@ def test_map_side_effects():
 
 @pytest.mark.parametrize('key_gen', [StringGen(nullable=False), IntegerGen(nullable=False), basic_struct_gen], ids=idfn)
 @pytest.mark.parametrize('value_gen', [StringGen(nullable=True), IntegerGen(nullable=True), basic_struct_gen], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_entry_map(key_gen, value_gen):
     data_gen = [('a', key_gen), ('b', value_gen)]
     assert_gpu_and_cpu_are_equal_collect(
@@ -457,6 +467,7 @@ def test_simple_get_map_value_with_strict_index(strict_index, data_gen):
                          [MapGen(StringGen(pattern='key_[0-9]', nullable=False), value(), max_length=6)
                           for value in get_map_value_gens()],
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_element_at_map_string_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -470,6 +481,7 @@ def test_element_at_map_string_keys(data_gen):
 
 
 @pytest.mark.parametrize('data_gen', numeric_key_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_element_at_map_numeric_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -485,6 +497,7 @@ def test_element_at_map_numeric_keys(data_gen):
                          [MapGen(DecimalGen(precision=35, scale=2, nullable=False), value(), max_length=6)
                           for value in get_map_value_gens(precision=37, scale=0)],
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_element_at_map_dec_col_keys(data_gen):
     keys = DecimalGen(precision=35, scale=2)
     assert_gpu_and_cpu_are_equal_collect(
@@ -510,6 +523,7 @@ def test_get_map_value_element_at_map_string_col_keys_ansi(data_gen, ansi):
                          [MapGen(StringGen(pattern='key_[0-9]', nullable=False), value(), max_length=6)
                           for value in get_map_value_gens(precision=37, scale=0)],
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_get_map_value_element_at_map_string_col_keys(data_gen):
     keys = StringGen(pattern='key_[0-9]')
     assert_gpu_and_cpu_are_equal_collect(
@@ -566,6 +580,7 @@ def test_get_map_value_string_col_keys_ansi_null(data_gen):
 @pytest.mark.parametrize('data_gen',
                          [MapGen(DateGen(nullable=False), value(), max_length=6)
                           for value in get_map_value_gens()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_element_at_map_date_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -579,6 +594,7 @@ def test_element_at_map_date_keys(data_gen):
                          [MapGen(TimestampGen(nullable=False), value(), max_length=6)
                           for value in get_map_value_gens()],
                          ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_element_at_map_timestamp_keys(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
@@ -610,6 +626,7 @@ def test_map_element_at_ansi_null(data_gen):
             conf=ansi_enabled_conf)
 
 @pytest.mark.parametrize('data_gen', map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_transform_values(data_gen):
     def do_it(spark):
         columns = ['a', 'b',
@@ -648,6 +665,7 @@ def do_it(spark):
 
 
 @pytest.mark.parametrize('data_gen', map_gens_sample + decimal_128_map_gens + decimal_64_map_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_transform_keys(data_gen):
     # The processing here is very limited, because we need to be sure we do not create duplicate keys.
     # This can happen because of integer overflow, round off errors in floating point, etc. So for now
@@ -707,6 +725,7 @@ def test_sql_map_scalars(query):
 
 
 @pytest.mark.parametrize('data_gen', map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_map_filter(data_gen):
     columns = ['map_filter(a, (key, value) -> isnotnull(value) )',
                'map_filter(a, (key, value) -> isnull(value) )',
diff --git a/integration_tests/src/main/python/mortgage_test.py b/integration_tests/src/main/python/mortgage_test.py
index aed9aa63c85..00bab066651 100644
--- a/integration_tests/src/main/python/mortgage_test.py
+++ b/integration_tests/src/main/python/mortgage_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_iterator
+from conftest import is_not_utc
 from marks import approximate_float, incompat, ignore_order, allow_non_gpu, limit
 
 @incompat
@@ -22,6 +23,7 @@
 @limit
 @ignore_order
 @allow_non_gpu(any=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_mortgage(mortgage):
   assert_gpu_and_cpu_are_equal_iterator(
           lambda spark : mortgage.do_test_query(spark))
diff --git a/integration_tests/src/main/python/orc_cast_test.py b/integration_tests/src/main/python/orc_cast_test.py
index 45860d5b299..cccd60125b9 100644
--- a/integration_tests/src/main/python/orc_cast_test.py
+++ b/integration_tests/src/main/python/orc_cast_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error
+from conftest import is_not_utc
 from data_gen import *
 from pyspark.sql.types import *
 from spark_session import with_cpu_session
@@ -49,6 +50,7 @@ def test_casting_among_integer_types(spark_tmp_path, reader_confs, v1_enabled_li
 
 
 @pytest.mark.parametrize('to_type', ['float', 'double', 'string', 'timestamp'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_casting_from_integer(spark_tmp_path, to_type):
     orc_path = spark_tmp_path + '/orc_cast_integer'
     # The Python 'datetime' module only supports a max-year of 10000, so we set the Long type max
@@ -70,6 +72,7 @@ def test_casting_from_integer(spark_tmp_path, to_type):
 @pytest.mark.parametrize('overflow_long_gen', [LongGen(min_val=int(1e16)),
                                                LongGen(max_val=int(-1e16))])
 @pytest.mark.parametrize('to_type', ['timestamp'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_casting_from_overflow_long(spark_tmp_path, overflow_long_gen,to_type):
     # Timestamp(micro-seconds) is actually type of int64, when casting long(int64) to timestamp,
     # we need to multiply 1e6 (or 1e3), and it may cause overflow. This function aims to test
@@ -100,6 +103,7 @@ def test_casting_from_float_and_double(spark_tmp_path, to_type):
 
 @pytest.mark.parametrize('data_gen', [DoubleGen(max_exp=32, special_cases=None),
                                       DoubleGen(max_exp=32, special_cases=[8.88e9, 9.99e10, 1.314e11])])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_casting_from_double_to_timestamp(spark_tmp_path, data_gen):
     # ORC will assume the original double value in seconds, we need to convert them to
     # timestamp(INT64 in micro-seconds).
@@ -123,6 +127,7 @@ def test_casting_from_double_to_timestamp(spark_tmp_path, data_gen):
     )
 
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_casting_from_overflow_double_to_timestamp(spark_tmp_path):
     orc_path = spark_tmp_path + '/orc_casting_from_overflow_double_to_timestamp'
     with_cpu_session(
diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py
index cbb2ee9e703..409d0850987 100644
--- a/integration_tests/src/main/python/orc_test.py
+++ b/integration_tests/src/main/python/orc_test.py
@@ -14,13 +14,13 @@
 
 import pytest
 
-from asserts import assert_cpu_and_gpu_are_equal_sql_with_capture, assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_row_counts_equal, assert_gpu_fallback_collect, \
-    assert_cpu_and_gpu_are_equal_collect_with_capture, assert_gpu_and_cpu_writes_are_equal_collect, assert_gpu_and_cpu_are_equal_sql
+from asserts import *
+from conftest import is_not_utc
 from data_gen import *
 from marks import *
 from pyspark.sql.types import *
 from spark_init_internal import spark_version
-from spark_session import with_cpu_session, is_before_spark_320, is_before_spark_330, is_spark_cdh, is_spark_340_or_later
+from spark_session import *
 from parquet_test import _nested_pruning_schemas
 from conftest import is_databricks_runtime
 
@@ -68,6 +68,7 @@ def get_orc_timestamp_gen(nullable=True):
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_basic_read(std_input_path, name, read_func, v1_enabled_list, orc_impl, reader_confs):
     all_confs = copy_and_update(reader_confs, {
         'spark.sql.sources.useV1SourceList': v1_enabled_list,
@@ -159,6 +160,7 @@ def test_orc_fallback(spark_tmp_path, read_func, disable_conf):
 @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_enabled_list):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     data_path = spark_tmp_path + '/ORC_DATA'
@@ -184,6 +186,7 @@ def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_e
 @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_pred_push_round_trip(spark_tmp_path, orc_gen, read_func, v1_enabled_list, reader_confs):
     data_path = spark_tmp_path + '/ORC_DATA'
     # Append two struct columns to verify nested predicate pushdown.
@@ -240,6 +243,7 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
 
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/131 is fixed
     # we should go with a more standard set of generators
@@ -306,6 +310,7 @@ def test_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reade
 
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_merge_schema_read(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/131 is fixed
     # we should go with a more standard set of generators
@@ -584,6 +589,7 @@ def test_read_struct_without_stream(spark_tmp_path):
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('case_sensitive', ["false", "true"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_with_more_columns(spark_tmp_path, orc_gen, reader_confs, v1_enabled_list, case_sensitive):
     struct_gen = StructGen([('nested_col', orc_gen)])
     # Map is not supported yet.
@@ -771,6 +777,7 @@ def test_orc_read_varchar_as_string(std_input_path):
 
 @pytest.mark.parametrize('gens', orc_gens_list, ids=idfn)
 @pytest.mark.parametrize('keep_order', [True, pytest.param(False, marks=pytest.mark.ignore_order(local=True))])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_read_round_trip_for_multithreaded_combining(spark_tmp_path, gens, keep_order):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(gens)]
     data_path = spark_tmp_path + '/ORC_DATA'
@@ -785,6 +792,7 @@ def test_read_round_trip_for_multithreaded_combining(spark_tmp_path, gens, keep_
 
 
 @pytest.mark.parametrize('keep_order', [True, pytest.param(False, marks=pytest.mark.ignore_order(local=True))])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_simple_partitioned_read_for_multithreaded_combining(spark_tmp_path, keep_order):
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
                 string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
diff --git a/integration_tests/src/main/python/orc_write_test.py b/integration_tests/src/main/python/orc_write_test.py
index cee10b9ce4e..5617f8e20e5 100644
--- a/integration_tests/src/main/python/orc_write_test.py
+++ b/integration_tests/src/main/python/orc_write_test.py
@@ -16,6 +16,7 @@
 
 from asserts import assert_gpu_and_cpu_writes_are_equal_collect, assert_gpu_fallback_write
 from spark_session import is_before_spark_320, is_spark_321cdh, is_spark_cdh, with_cpu_session, with_gpu_session
+from conftest import is_not_utc
 from datetime import date, datetime, timezone
 from data_gen import *
 from marks import *
@@ -80,6 +81,7 @@
 
 @pytest.mark.parametrize('orc_gens', orc_write_gens_list, ids=idfn)
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_round_trip(spark_tmp_path, orc_gens, orc_impl):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     data_path = spark_tmp_path + '/ORC_DATA'
@@ -114,6 +116,7 @@ def test_write_round_trip_corner(spark_tmp_path, orc_gen, orc_impl):
 # There are race conditions around when individual files are read in for partitioned data
 @ignore_order
 @pytest.mark.parametrize('orc_gen', orc_part_write_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_part_write_round_trip(spark_tmp_path, orc_gen):
     gen_list = [('a', RepeatSeqGen(orc_gen, 10)),
             ('b', orc_gen)]
@@ -167,6 +170,7 @@ def test_compress_write_round_trip(spark_tmp_path, compress):
 @pytest.mark.order(2)
 @pytest.mark.parametrize('orc_gens', orc_write_gens_list, ids=idfn)
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_save_table(spark_tmp_path, orc_gens, orc_impl, spark_tmp_table_factory):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     data_path = spark_tmp_path + '/ORC_DATA'
@@ -189,6 +193,7 @@ def write_orc_sql_from(spark, df, data_path, write_to_table):
 @pytest.mark.parametrize('orc_gens', orc_write_gens_list, ids=idfn)
 @pytest.mark.parametrize('ts_type', ["TIMESTAMP_MICROS", "TIMESTAMP_MILLIS"])
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_sql_save_table(spark_tmp_path, orc_gens, ts_type, orc_impl, spark_tmp_table_factory):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     data_path = spark_tmp_path + '/ORC_DATA'
@@ -200,6 +205,7 @@ def test_write_sql_save_table(spark_tmp_path, orc_gens, ts_type, orc_impl, spark
 
 @allow_non_gpu('DataWritingCommandExec,ExecutedCommandExec,WriteFilesExec')
 @pytest.mark.parametrize('codec', ['zlib', 'lzo'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_orc_write_compression_fallback(spark_tmp_path, codec, spark_tmp_table_factory):
     gen = TimestampGen()
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -256,6 +262,7 @@ def sql_write(spark, path):
 
 
 @pytest.mark.parametrize('orc_gens', orc_write_gens_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_empty_orc_round_trip(spark_tmp_path, orc_gens):
     def create_empty_df(spark, path):
         gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py
index 8efacc18d3e..f6cc2a0141b 100644
--- a/integration_tests/src/main/python/parquet_test.py
+++ b/integration_tests/src/main/python/parquet_test.py
@@ -15,8 +15,8 @@
 
 import pytest
 
-from asserts import assert_cpu_and_gpu_are_equal_collect_with_capture, assert_cpu_and_gpu_are_equal_sql_with_capture, assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_row_counts_equal, \
-    assert_gpu_fallback_collect, assert_gpu_and_cpu_are_equal_sql, assert_gpu_and_cpu_error, assert_spark_exception
+from asserts import *
+from conftest import is_not_utc
 from data_gen import *
 from parquet_write_test import parquet_nested_datetime_gen, parquet_ts_write_options
 from marks import *
@@ -25,7 +25,7 @@
 from pyspark.sql.types import *
 from pyspark.sql.functions import *
 from spark_init_internal import spark_version
-from spark_session import with_cpu_session, with_gpu_session, is_before_spark_320, is_before_spark_330, is_spark_321cdh
+from spark_session import *
 from conftest import is_databricks_runtime, is_dataproc_runtime
 
 
@@ -164,6 +164,7 @@ def setup_table(spark):
 @pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_read_round_trip(spark_tmp_path, parquet_gens, read_func, reader_confs, v1_enabled_list):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -298,6 +299,7 @@ def test_parquet_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_l
 @pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_pred_push_round_trip(spark_tmp_path, parquet_gen, read_func, v1_enabled_list, reader_confs):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     gen_list = [('a', RepeatSeqGen(parquet_gen, 100)), ('b', parquet_gen)]
@@ -317,6 +319,7 @@ def test_parquet_pred_push_round_trip(spark_tmp_path, parquet_gen, read_func, v1
 @pytest.mark.parametrize('ts_rebase_read', [('CORRECTED', 'LEGACY'), ('LEGACY', 'CORRECTED')])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_read_roundtrip_datetime_with_legacy_rebase(spark_tmp_path, parquet_gens, ts_type,
                                                             ts_rebase_write, ts_rebase_read,
                                                             reader_confs, v1_enabled_list):
@@ -356,6 +359,7 @@ def test_parquet_decimal_read_legacy(spark_tmp_path, parquet_gens, read_func, re
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
 @pytest.mark.parametrize('batch_size', [100, INT_MAX])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs, batch_size):
     # Once https://github.com/NVIDIA/spark-rapids/issues/133 and https://github.com/NVIDIA/spark-rapids/issues/132 are fixed
     # we should go with a more standard set of generators
@@ -387,6 +391,7 @@ def test_parquet_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader
 # In this we are reading the data, but only reading the key the data was partitioned by
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reader_confs):
     parquet_gens = [byte_gen]
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
@@ -529,6 +534,7 @@ def read_and_remove(spark):
 
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/133 and https://github.com/NVIDIA/spark-rapids/issues/132 are fixed
     # we should go with a more standard set of generators
@@ -553,6 +559,7 @@ def test_parquet_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs
 
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_read_merge_schema_from_conf(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/133 and https://github.com/NVIDIA/spark-rapids/issues/132 are fixed
     # we should go with a more standard set of generators
@@ -868,6 +875,7 @@ def test_parquet_reading_from_unaligned_pages_basic_filters(spark_tmp_path, read
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 @pytest.mark.parametrize('enable_dictionary', ["true", "false"], ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_reading_from_unaligned_pages_all_types(spark_tmp_path, reader_confs, enable_dictionary, v1_enabled_list):
     all_confs = copy_and_update(reader_confs, {'spark.sql.sources.useV1SourceList': v1_enabled_list})
     data_path = spark_tmp_path + '/PARQUET_UNALIGNED_DATA'
@@ -895,6 +903,7 @@ def test_parquet_reading_from_unaligned_pages_all_types(spark_tmp_path, reader_c
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 @pytest.mark.parametrize('enable_dictionary', ["true", "false"], ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_reading_from_unaligned_pages_all_types_dict_optimized(spark_tmp_path, reader_confs, enable_dictionary, v1_enabled_list):
     all_confs = copy_and_update(reader_confs, {'spark.sql.sources.useV1SourceList': v1_enabled_list})
     data_path = spark_tmp_path + '/PARQUET_UNALIGNED_DATA'
diff --git a/integration_tests/src/main/python/parquet_testing_test.py b/integration_tests/src/main/python/parquet_testing_test.py
index 642d99c8f0b..a4600de7b86 100644
--- a/integration_tests/src/main/python/parquet_testing_test.py
+++ b/integration_tests/src/main/python/parquet_testing_test.py
@@ -16,7 +16,7 @@
 # https://github.com/apache/parquet-testing
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error
-from conftest import get_std_input_path, is_parquet_testing_tests_forced, is_precommit_run
+from conftest import get_std_input_path, is_parquet_testing_tests_forced, is_precommit_run, is_not_utc
 from data_gen import copy_and_update
 from pathlib import Path
 import pytest
@@ -122,6 +122,7 @@ def gen_testing_params_for_valid_files():
 
 @pytest.mark.parametrize("path", gen_testing_params_for_valid_files())
 @pytest.mark.parametrize("confs", [_native_reader_confs, _java_reader_confs])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_testing_valid_files(path, confs):
     assert_gpu_and_cpu_are_equal_collect(lambda spark: spark.read.parquet(path), conf=confs)
 
diff --git a/integration_tests/src/main/python/parquet_write_test.py b/integration_tests/src/main/python/parquet_write_test.py
index c83939f4774..9584f2a3520 100644
--- a/integration_tests/src/main/python/parquet_write_test.py
+++ b/integration_tests/src/main/python/parquet_write_test.py
@@ -14,14 +14,14 @@
 
 import pytest
 
-from asserts import assert_gpu_and_cpu_sql_writes_are_equal_collect, assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_writes_are_equal_collect, assert_gpu_fallback_write, assert_spark_exception
+from asserts import *
+from conftest import is_not_utc
 from datetime import date, datetime, timezone
 from data_gen import *
 from enum import Enum
 from marks import *
 from pyspark.sql.types import *
-from spark_session import with_cpu_session, with_gpu_session, is_before_spark_330, is_before_spark_320, is_spark_cdh, \
-    is_databricks_runtime, is_before_spark_340, is_spark_340_or_later, is_databricks122_or_later
+from spark_session import *
 
 import pyspark.sql.functions as f
 import pyspark.sql.utils
@@ -90,6 +90,7 @@
 
 @pytest.mark.order(1) # at the head of xdist worker queue if pytest-order is installed
 @pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_round_trip(spark_tmp_path, parquet_gens):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -135,6 +136,7 @@ def test_write_round_trip_corner(spark_tmp_path, par_gen):
     ArrayGen(TimestampGen(), max_length=10),
     MapGen(TimestampGen(nullable=False), TimestampGen())]], ids=idfn)
 @pytest.mark.parametrize('ts_type', parquet_ts_write_options)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_timestamp_write_round_trip(spark_tmp_path, parquet_gens, ts_type):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -148,6 +150,7 @@ def test_timestamp_write_round_trip(spark_tmp_path, parquet_gens, ts_type):
 @pytest.mark.parametrize('ts_type', parquet_ts_write_options)
 @pytest.mark.parametrize('ts_rebase', ['CORRECTED'])
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_ts_millis(spark_tmp_path, ts_type, ts_rebase):
     gen = TimestampGen()
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -171,6 +174,7 @@ def test_write_ts_millis(spark_tmp_path, ts_type, ts_rebase):
 @ignore_order
 @pytest.mark.order(1) # at the head of xdist worker queue if pytest-order is installed
 @pytest.mark.parametrize('parquet_gen', parquet_part_write_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_part_write_round_trip(spark_tmp_path, parquet_gen):
     gen_list = [('a', RepeatSeqGen(parquet_gen, 10)),
             ('b', parquet_gen)]
@@ -185,6 +189,7 @@ def test_part_write_round_trip(spark_tmp_path, parquet_gen):
 @pytest.mark.skipif(is_spark_340_or_later() or is_databricks122_or_later(), reason="`WriteFilesExec` is only supported in Spark 340+")
 @pytest.mark.parametrize('data_gen', [TimestampGen()], ids=idfn)
 @pytest.mark.allow_non_gpu("DataWritingCommandExec")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_int96_write_conf(spark_tmp_path, data_gen):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     confs = copy_and_update(writer_confs, {
@@ -202,6 +207,7 @@ def test_int96_write_conf(spark_tmp_path, data_gen):
 @pytest.mark.parametrize('data_gen', [TimestampGen()], ids=idfn)
 # Note: From Spark 340, WriteFilesExec is introduced.
 @pytest.mark.allow_non_gpu("DataWritingCommandExec", "WriteFilesExec")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_int96_write_conf_with_write_exec(spark_tmp_path, data_gen):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     confs = copy_and_update(writer_confs, {
@@ -215,6 +221,7 @@ def test_int96_write_conf_with_write_exec(spark_tmp_path, data_gen):
         ['DataWritingCommandExec', 'WriteFilesExec'],
         confs)
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_all_null_int96(spark_tmp_path):
     class AllNullTimestampGen(TimestampGen):
         def start(self, rand):
@@ -244,6 +251,7 @@ def test_compress_write_round_trip(spark_tmp_path, compress):
 
 @pytest.mark.order(2)
 @pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_save_table(spark_tmp_path, parquet_gens, spark_tmp_table_factory):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -261,6 +269,7 @@ def write_parquet_sql_from(spark, df, data_path, write_to_table):
 
 @pytest.mark.order(2)
 @pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_sql_save_table(spark_tmp_path, parquet_gens, spark_tmp_table_factory):
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -283,6 +292,7 @@ def writeParquetUpgradeCatchException(spark, df, data_path, spark_tmp_table_fact
                          ('TIMESTAMP_MICROS', TimestampGen(start=datetime(1, 2, 1, tzinfo=timezone.utc), end=datetime(1899, 12, 31, tzinfo=timezone.utc))),
                          ('TIMESTAMP_MILLIS', TimestampGen(start=datetime(1, 2, 1, tzinfo=timezone.utc), end=datetime(1899, 12, 31, tzinfo=timezone.utc)))])
 @pytest.mark.parametrize('rebase', ["CORRECTED","EXCEPTION"])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_ts_write_fails_datetime_exception(spark_tmp_path, ts_write_data_gen, spark_tmp_table_factory, rebase):
     ts_write, gen = ts_write_data_gen
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -461,6 +471,7 @@ def generate_map_with_empty_validity(spark, path):
 @pytest.mark.parametrize('data_gen', parquet_nested_datetime_gen, ids=idfn)
 @pytest.mark.parametrize('ts_write', parquet_ts_write_options)
 @pytest.mark.parametrize('ts_rebase_write', ['EXCEPTION'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_write_fails_legacy_datetime(spark_tmp_path, data_gen, ts_write, ts_rebase_write):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     all_confs = {'spark.sql.parquet.outputTimestampType': ts_write,
@@ -478,6 +489,7 @@ def writeParquetCatchException(spark, data_gen, data_path):
 @pytest.mark.parametrize('ts_write', parquet_ts_write_options)
 @pytest.mark.parametrize('ts_rebase_write', [('CORRECTED', 'LEGACY'), ('LEGACY', 'CORRECTED')])
 @pytest.mark.parametrize('ts_rebase_read', [('CORRECTED', 'LEGACY'), ('LEGACY', 'CORRECTED')])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_parquet_write_roundtrip_datetime_with_legacy_rebase(spark_tmp_path, data_gen, ts_write,
                                                              ts_rebase_write, ts_rebase_read):
     data_path = spark_tmp_path + '/PARQUET_DATA'
@@ -521,6 +533,7 @@ def test_it(spark):
     with_gpu_session(test_it, conf)
 
 @pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_write_empty_parquet_round_trip(spark_tmp_path, parquet_gens):
     def create_empty_df(spark, path):
         gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
@@ -760,6 +773,7 @@ def read_table(spark, path):
 
 # Test to avoid regression on a known bug in Spark. For details please visit https://github.com/NVIDIA/spark-rapids/issues/8693
 @pytest.mark.parametrize('ts_rebase', ['LEGACY', 'CORRECTED'])
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hive_timestamp_value(spark_tmp_table_factory, spark_tmp_path, ts_rebase):
     def func_test(create_table, read_table, data_path, conf):
         assert_gpu_and_cpu_writes_are_equal_collect(create_table, read_table, data_path, conf=conf)
diff --git a/integration_tests/src/main/python/qa_nightly_select_test.py b/integration_tests/src/main/python/qa_nightly_select_test.py
index ba3414e51fe..1349de3fcdf 100644
--- a/integration_tests/src/main/python/qa_nightly_select_test.py
+++ b/integration_tests/src/main/python/qa_nightly_select_test.py
@@ -16,6 +16,7 @@
 from pyspark.sql.types import *
 from pyspark import SparkConf, SparkContext, SQLContext
 import pyspark.sql.functions as f
+from conftest import is_not_utc
 import datetime
 from argparse import ArgumentParser
 from decimal import Decimal
@@ -158,6 +159,7 @@ def idfn(val):
 @incompat
 @qarun
 @pytest.mark.parametrize('sql_query_line', SELECT_SQL, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_select(sql_query_line, pytestconfig):
     sql_query = sql_query_line[0]
     if sql_query:
@@ -170,6 +172,7 @@ def test_select(sql_query_line, pytestconfig):
 @incompat
 @qarun
 @pytest.mark.parametrize('sql_query_line', SELECT_NEEDS_SORT_SQL, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_needs_sort_select(sql_query_line, pytestconfig):
     sql_query = sql_query_line[0]
     if sql_query:
@@ -182,6 +185,7 @@ def test_needs_sort_select(sql_query_line, pytestconfig):
 @ignore_order(local=True)
 @qarun
 @pytest.mark.parametrize('sql_query_line', SELECT_JOIN_SQL, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_select_join(sql_query_line, pytestconfig):
     sql_query = sql_query_line[0]
     if sql_query:
@@ -198,6 +202,7 @@ def init_tables(spark):
 @ignore_order(local=True)
 @qarun
 @pytest.mark.parametrize('sql_query_line', SELECT_PRE_ORDER_SQL, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_select_first_last(sql_query_line, pytestconfig):
     sql_query = sql_query_line[0]
     if sql_query:
@@ -210,6 +215,7 @@ def test_select_first_last(sql_query_line, pytestconfig):
 @ignore_order(local=True)
 @qarun
 @pytest.mark.parametrize('sql_query_line', SELECT_FLOAT_SQL, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_select_float_order_local(sql_query_line, pytestconfig):
     sql_query = sql_query_line[0]
     if sql_query:
@@ -224,6 +230,7 @@ def test_select_float_order_local(sql_query_line, pytestconfig):
 @qarun
 @pytest.mark.parametrize('sql_query_line', SELECT_REGEXP_SQL, ids=idfn)
 @pytest.mark.skipif(not is_jvm_charset_utf8(), reason="Regular expressions require UTF-8")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_select_regexp(sql_query_line, pytestconfig):
     sql_query = sql_query_line[0]
     if sql_query:
diff --git a/integration_tests/src/main/python/repart_test.py b/integration_tests/src/main/python/repart_test.py
index d44280ada69..60e0a191f25 100644
--- a/integration_tests/src/main/python/repart_test.py
+++ b/integration_tests/src/main/python/repart_test.py
@@ -16,6 +16,7 @@
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect
 from spark_session import is_before_spark_320, is_before_spark_330
+from conftest import is_not_utc
 from data_gen import *
 from marks import ignore_order, allow_non_gpu
 import pyspark.sql.functions as f
@@ -89,6 +90,7 @@ def test_union_struct_missing_children(data_gen):
                                       nested_struct,
                                       struct_of_maps], ids=idfn)
 # This tests union of two DFs of two cols each. The types of the left col and right col is the same
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_union(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).union(binary_op_df(spark, data_gen)))
@@ -99,6 +101,7 @@ def test_union(data_gen):
                                       nested_struct,
                                       struct_of_maps], ids=idfn)
 # This tests union of two DFs of two cols each. The types of the left col and right col is the same
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_unionAll(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).unionAll(binary_op_df(spark, data_gen)))
@@ -113,6 +116,7 @@ def test_unionAll(data_gen):
                                       struct_of_maps], ids=idfn)
 # This tests the union of two DFs of structs with missing child column names. The missing child
 # column will be replaced by nulls in the output DF. This is a feature added in 3.1+
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_union_by_missing_col_name(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : binary_op_df(spark, data_gen).withColumnRenamed("a", "x")
@@ -154,6 +158,7 @@ def assert_union_equal(gen1, gen2):
                                       StructGen([['child0', DecimalGen(7, 2)]]),
                                       nested_struct,
                                       struct_of_maps], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_union_by_name(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).unionByName(binary_op_df(spark, data_gen)))
@@ -165,12 +170,14 @@ def test_union_by_name(data_gen):
     pytest.param([('array' + str(i), gen) for i, gen in enumerate(array_gens_sample + [ArrayGen(BinaryGen(max_length=5), max_length=5)])]),
     pytest.param([('map' + str(i), gen) for i, gen in enumerate(map_gens_sample)]),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_coalesce_types(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen).coalesce(2))
 
 @pytest.mark.parametrize('num_parts', [1, 10, 100, 1000, 2000], ids=idfn)
 @pytest.mark.parametrize('length', [0, 2048, 4096], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_coalesce_df(num_parts, length):
     #This should change eventually to be more than just the basic gens
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(all_basic_gens + decimal_gens + [binary_gen])]
@@ -186,6 +193,7 @@ def test_coalesce_df(num_parts, length):
 @pytest.mark.parametrize('num_parts', [1, 10, 2345], ids=idfn)
 @pytest.mark.parametrize('length', [0, 2048, 4096], ids=idfn)
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_repartition_df(data_gen, num_parts, length):
     from pyspark.sql.functions import lit
     assert_gpu_and_cpu_are_equal_collect(
@@ -202,6 +210,7 @@ def test_repartition_df(data_gen, num_parts, length):
 @pytest.mark.parametrize('num_parts', [1, 10, 2345], ids=idfn)
 @pytest.mark.parametrize('length', [0, 2048, 4096], ids=idfn)
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_repartition_df_for_round_robin(data_gen, num_parts, length):
     from pyspark.sql.functions import lit
     assert_gpu_and_cpu_are_equal_collect(
@@ -275,6 +284,7 @@ def test_hash_fallback(data_gen):
     ([('a', decimal_gen_64bit), ('b', decimal_gen_64bit), ('c', decimal_gen_64bit)], ['a', 'b', 'c']),
     ([('a', decimal_gen_128bit), ('b', decimal_gen_128bit), ('c', decimal_gen_128bit)], ['a', 'b', 'c']),
     ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hash_repartition_exact(gen, num_parts):
     data_gen = gen[0]
     part_on = gen[1]
diff --git a/integration_tests/src/main/python/row-based_udf_test.py b/integration_tests/src/main/python/row-based_udf_test.py
index e849a87b10e..19b02f2e24e 100644
--- a/integration_tests/src/main/python/row-based_udf_test.py
+++ b/integration_tests/src/main/python/row-based_udf_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_sql
+from conftest import is_not_utc
 from data_gen import *
 from spark_session import with_spark_session, is_spark_350_or_later
 from conftest import skip_unless_precommit_tests
@@ -33,6 +34,7 @@ def load_hive_udf(spark, udfname, udfclass):
 
 @pytest.mark.xfail(condition=is_spark_350_or_later(),
                    reason='https://github.com/NVIDIA/spark-rapids/issues/9064')
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hive_empty_simple_udf():
 
     with_spark_session(skip_if_no_hive)
@@ -46,6 +48,7 @@ def evalfn(spark):
         "SELECT i, emptysimple(s, 'const_string') FROM hive_simple_udf_test_table",
         conf={'spark.rapids.sql.rowBasedUDF.enabled': 'true'})
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_hive_empty_generic_udf():
     with_spark_session(skip_if_no_hive)
     def evalfn(spark):
diff --git a/integration_tests/src/main/python/row_conversion_test.py b/integration_tests/src/main/python/row_conversion_test.py
index 92ea05d68be..bc13419c8ec 100644
--- a/integration_tests/src/main/python/row_conversion_test.py
+++ b/integration_tests/src/main/python/row_conversion_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from marks import allow_non_gpu, approximate_float, incompat
 from pyspark.sql.types import *
@@ -28,6 +29,7 @@
 # to be brought back to the CPU (rows) to be returned.
 # So we just need a very simple operation in the middle that
 # can be done on the GPU.
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_row_conversions():
     gens = [["a", byte_gen], ["b", short_gen], ["c", int_gen], ["d", long_gen],
             ["e", float_gen], ["f", double_gen], ["g", string_gen], ["h", boolean_gen],
@@ -42,6 +44,7 @@ def test_row_conversions():
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : gen_df(spark, gens).selectExpr("*", "a as a_again"))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_row_conversions_fixed_width():
     gens = [["a", byte_gen], ["b", short_gen], ["c", int_gen], ["d", long_gen],
             ["e", float_gen], ["f", double_gen], ["h", boolean_gen],
@@ -50,6 +53,7 @@ def test_row_conversions_fixed_width():
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : gen_df(spark, gens).selectExpr("*", "a as a_again"))
 
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_row_conversions_fixed_width_wide():
     gens = [["a{}".format(i), ByteGen(nullable=True)] for i in range(10)] + \
            [["b{}".format(i), ShortGen(nullable=True)] for i in range(10)] + \
diff --git a/integration_tests/src/main/python/sample_test.py b/integration_tests/src/main/python/sample_test.py
index fc9d9fc4cbf..5ae72212bed 100644
--- a/integration_tests/src/main/python/sample_test.py
+++ b/integration_tests/src/main/python/sample_test.py
@@ -14,6 +14,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from pyspark.sql.types import *
 from spark_session import is_before_spark_330
@@ -38,6 +39,7 @@ def test_sample_produce_empty_batch(data_gen):
 # the following cases is the normal cases and do not use @ignore_order
 nested_gens = array_gens_sample + struct_gens_sample + map_gens_sample
 @pytest.mark.parametrize('data_gen', basic_gens + nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sample(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen, num_slices = 10)
@@ -45,6 +47,7 @@ def test_sample(data_gen):
     )
 
 @pytest.mark.parametrize('data_gen', basic_gens + nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sample_with_replacement(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen, num_slices = 10).sample(
diff --git a/integration_tests/src/main/python/schema_evolution_test.py b/integration_tests/src/main/python/schema_evolution_test.py
index 4138bb11e86..d9f4c0f0899 100644
--- a/integration_tests/src/main/python/schema_evolution_test.py
+++ b/integration_tests/src/main/python/schema_evolution_test.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from datetime import date, datetime, timezone
 from marks import ignore_order
@@ -61,6 +62,7 @@ def get_ddl(col_gen_pairs):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize("format", _formats)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_column_add_after_partition(spark_tmp_table_factory, format):
     # Databricks 10.4 appears to be missing https://issues.apache.org/jira/browse/SPARK-39417
     # so avoid generating nulls for numeric partitions
diff --git a/integration_tests/src/main/python/sort_test.py b/integration_tests/src/main/python/sort_test.py
index f3a73066af5..7fe208ae12d 100644
--- a/integration_tests/src/main/python/sort_test.py
+++ b/integration_tests/src/main/python/sort_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect
+from conftest import is_not_utc
 from data_gen import *
 from marks import allow_non_gpu
 from pyspark.sql.types import *
@@ -51,6 +52,7 @@ def test_sort_nonbinary_carry_binary(data_gen):
 
 @pytest.mark.parametrize('data_gen', orderable_gens + orderable_not_null_gen, ids=idfn)
 @pytest.mark.parametrize('order', [f.col('a').asc(), f.col('a').asc_nulls_last(), f.col('a').desc(), f.col('a').desc_nulls_first()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_orderby(data_gen, order):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).orderBy(order))
@@ -58,6 +60,7 @@ def test_single_orderby(data_gen, order):
 @pytest.mark.parametrize('data_gen', single_level_array_gens, ids=idfn)
 @pytest.mark.parametrize('order', [f.col('a').asc(), f.col('a').asc_nulls_first(), f.col('a').asc_nulls_last(),
                                    f.col('a').desc(), f.col('a').desc_nulls_first(), f.col('a').desc_nulls_last()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_orderby_on_array(data_gen, order):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).orderBy(order))
@@ -102,6 +105,7 @@ def test_single_orderby_fallback_for_array_of_struct(data_gen, order):
         marks=pytest.mark.xfail(reason='opposite null order not supported')),
     pytest.param(f.col('a').desc_nulls_last()),
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_nested_orderby_plain(data_gen, order, shuffle_parts, stable_sort):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).orderBy(order),
@@ -129,6 +133,7 @@ def test_single_nested_orderby_fallback_for_nullorder(data_gen, order):
 orderable_without_neg_decimal = [n for n in (orderable_gens + orderable_not_null_gen) if not (isinstance(n, DecimalGen) and n.scale < 0)]
 @pytest.mark.parametrize('data_gen', orderable_without_neg_decimal + single_level_array_gens, ids=idfn)
 @pytest.mark.parametrize('order', [f.col('a').asc(), f.col('a').asc_nulls_last(), f.col('a').desc(), f.col('a').desc_nulls_first()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_orderby_with_limit(data_gen, order):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).orderBy(order).limit(100))
@@ -139,6 +144,7 @@ def test_single_orderby_with_limit(data_gen, order):
     pytest.param(f.col('a').desc(), all_basic_struct_gen),
     pytest.param(f.col('a').desc_nulls_last(), all_basic_struct_gen)
 ], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_nested_orderby_with_limit(data_gen, order):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).orderBy(order).limit(100),
@@ -161,6 +167,7 @@ def test_single_nested_orderby_with_limit_fallback(data_gen, order):
 
 @pytest.mark.parametrize('data_gen', orderable_gens + orderable_not_null_gen + single_level_array_gens, ids=idfn)
 @pytest.mark.parametrize('order', [f.col('a').asc(), f.col('a').asc_nulls_last(), f.col('a').desc(), f.col('a').desc_nulls_first()], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_sort_in_part(data_gen, order):
     # We set `num_slices` to handle https://github.com/NVIDIA/spark-rapids/issues/2477
     assert_gpu_and_cpu_are_equal_collect(
@@ -183,6 +190,7 @@ def test_single_sort_in_part(data_gen, order):
     pytest.param(f.col('a').desc_nulls_last()),
 ], ids=idfn)
 @pytest.mark.parametrize('stable_sort', ['STABLE', 'OUTOFCORE'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_nested_sort_in_part(data_gen, order, stable_sort):
     sort_conf = {'spark.rapids.sql.stableSort.enabled': stable_sort == 'STABLE'}
     assert_gpu_and_cpu_are_equal_collect(
@@ -193,11 +201,13 @@ def test_single_nested_sort_in_part(data_gen, order, stable_sort):
         boolean_gen, timestamp_gen, date_gen, string_gen, null_gen, StructGen([('child0', long_gen)])
                        ] + orderable_decimal_gens + single_level_array_gens
 @pytest.mark.parametrize('data_gen', orderable_gens_sort, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_multi_orderby(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).orderBy(f.col('a'), f.col('b').desc()))
 
 @pytest.mark.parametrize('data_gen', single_level_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_multi_orderby_on_array(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).orderBy(f.col('a'), f.col('b').desc()))
@@ -205,6 +215,7 @@ def test_multi_orderby_on_array(data_gen):
 # SPARK CPU itself has issue with negative scale for take ordered and project
 orderable_gens_sort_without_neg_decimal = [n for n in orderable_gens_sort if not (isinstance(n, DecimalGen) and n.scale < 0)]
 @pytest.mark.parametrize('data_gen', orderable_gens_sort_without_neg_decimal + single_level_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_multi_orderby_with_limit(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).orderBy(f.col('a'), f.col('b').desc()).limit(100))
@@ -212,6 +223,7 @@ def test_multi_orderby_with_limit(data_gen):
 # We added in a partitioning optimization to take_ordered_and_project
 # This should trigger it.
 @pytest.mark.parametrize('data_gen', orderable_gens_sort_without_neg_decimal + single_level_array_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_multi_orderby_with_limit_single_part(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).coalesce(1).orderBy(f.col('a'), f.col('b').desc()).limit(100))
@@ -256,6 +268,7 @@ def test_single_orderby_with_skew(data_gen):
 # We are not trying all possibilities, just doing a few with numbers so the query works.
 @pytest.mark.parametrize('data_gen', [all_basic_struct_gen, StructGen([['child0', all_basic_struct_gen]])], ids=idfn)
 @pytest.mark.parametrize('stable_sort', ['STABLE', 'OUTOFCORE'], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_nested_orderby_with_skew(data_gen, stable_sort):
     sort_conf = {'spark.rapids.sql.stableSort.enabled': stable_sort == 'STABLE'}
     # When doing range partitioning the upstream data is sampled to try and get the bounds for cutoffs.
@@ -299,6 +312,7 @@ def test_large_orderby(data_gen, stable_sort):
     simple_string_to_string_map_gen,
     ArrayGen(byte_gen, max_length=5)], ids=idfn)
 @pytest.mark.order(2)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_large_orderby_nested_ridealong(data_gen):
     # We use a UniqueLongGen to avoid duplicate keys that can cause ambiguity in the sort
     # results, especially on distributed clusters.
@@ -319,6 +333,7 @@ def test_large_orderby_nested_ridealong(data_gen):
     ArrayGen(byte_gen, max_length=5),
     ArrayGen(decimal_gen_128bit, max_length=5)], ids=idfn)
 @pytest.mark.order(2)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_orderby_nested_ridealong_limit(data_gen):
     # We use a UniqueLongGen to avoid duplicate keys that can cause ambiguity in the sort
     # results, especially on distributed clusters.
diff --git a/integration_tests/src/main/python/struct_test.py b/integration_tests/src/main/python/struct_test.py
index 0e230a95408..986781c32e0 100644
--- a/integration_tests/src/main/python/struct_test.py
+++ b/integration_tests/src/main/python/struct_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql
+from conftest import is_not_utc
 from data_gen import *
 from pyspark.sql.types import *
 
@@ -33,6 +34,7 @@ def test_struct_scalar_project():
     StructGen([["first", decimal_gen_64bit], ["second", decimal_gen_32bit], ["third", decimal_gen_32bit]]),
     StructGen([["first", decimal_gen_128bit], ["second", decimal_gen_128bit], ["third", decimal_gen_128bit]]),
     StructGen([["first", binary_gen], ["second", ArrayGen(BinaryGen(max_length=10), max_length=10)], ["third", binary_gen]])], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_struct_get_item(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr(
@@ -43,6 +45,7 @@ def test_struct_get_item(data_gen):
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + decimal_gens + [binary_gen,
     null_gen] + single_level_array_gens + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_make_struct(data_gen):
     # Spark has no good way to create a map literal without the map function
     # so we are inserting one.
diff --git a/integration_tests/src/main/python/subquery_test.py b/integration_tests/src/main/python/subquery_test.py
index e6d641d4212..25a70b47a17 100644
--- a/integration_tests/src/main/python/subquery_test.py
+++ b/integration_tests/src/main/python/subquery_test.py
@@ -14,11 +14,13 @@
 
 import pytest
 from asserts import assert_gpu_and_cpu_are_equal_sql
+from conftest import is_not_utc
 from data_gen import *
 from marks import *
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_basic_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_scalar_subquery_basics(data_gen):
     # Fix num_slices at 1 to make sure that first/last returns same results under CPU and GPU.
     assert_gpu_and_cpu_are_equal_sql(
@@ -31,6 +33,7 @@ def test_scalar_subquery_basics(data_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('basic_gen', all_basic_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_scalar_subquery_struct(basic_gen):
     # single-level struct
     gen = [('ss', StructGen([['a', basic_gen], ['b', basic_gen]]))]
@@ -65,6 +68,7 @@ def test_scalar_subquery_struct(basic_gen):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('basic_gen', all_basic_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_scalar_subquery_array(basic_gen):
     # single-level array
     assert_gpu_and_cpu_are_equal_sql(
diff --git a/integration_tests/src/main/python/time_window_test.py b/integration_tests/src/main/python/time_window_test.py
index ff367b506fb..52071926309 100644
--- a/integration_tests/src/main/python/time_window_test.py
+++ b/integration_tests/src/main/python/time_window_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect
+from conftest import is_not_utc
 from data_gen import *
 from datetime import datetime
 from marks import ignore_order, allow_non_gpu
@@ -29,6 +30,7 @@
 
 @pytest.mark.parametrize('data_gen', integral_gens + [string_gen], ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_grouped_tumbling_window(data_gen):
     row_gen = StructGen([['ts', _restricted_ts_gen],['data', data_gen]], nullable=False)
     assert_gpu_and_cpu_are_equal_collect(
@@ -40,6 +42,7 @@ def test_grouped_tumbling_window(data_gen):
 
 @pytest.mark.parametrize('data_gen', integral_gens + [string_gen], ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_grouped_sliding_window(data_gen):
     row_gen = StructGen([['ts', _restricted_ts_gen],['data', data_gen]], nullable=False)
     assert_gpu_and_cpu_are_equal_collect(
@@ -47,6 +50,7 @@ def test_grouped_sliding_window(data_gen):
 
 @pytest.mark.parametrize('data_gen', integral_gens + [string_gen], ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_grouped_sliding_window_array(data_gen):
     row_gen = StructGen([['ts', _restricted_ts_gen],['data', ArrayGen(data_gen)]], nullable=False)
     assert_gpu_and_cpu_are_equal_collect(
@@ -54,6 +58,7 @@ def test_grouped_sliding_window_array(data_gen):
 
 @pytest.mark.parametrize('data_gen', integral_gens + [string_gen], ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_tumbling_window(data_gen):
     row_gen = StructGen([['ts', _restricted_ts_gen],['data', data_gen]], nullable=False)
     w = Window.partitionBy(f.window('ts', '5 hour'))
@@ -62,6 +67,7 @@ def test_tumbling_window(data_gen):
 
 @pytest.mark.parametrize('data_gen', integral_gens + [string_gen], ids=idfn)
 @ignore_order
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_sliding_window(data_gen):
     row_gen = StructGen([['ts', _restricted_ts_gen],['data', data_gen]], nullable=False)
     w = Window.partitionBy(f.window('ts', '5 hour', '1 hour'))
@@ -72,6 +78,7 @@ def test_sliding_window(data_gen):
 @pytest.mark.parametrize('data_gen', all_basic_gens + decimal_gens + array_gens_sample + map_gens_sample, ids=idfn)
 # This includes an expand and we produce a different order than the CPU does. Sort locally to allow sorting of all types
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_just_window(data_gen):
     row_gen = StructGen([['ts', timestamp_gen],['data', data_gen]], nullable=False)
     assert_gpu_and_cpu_are_equal_collect(
diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py
index db8425f6387..88281279162 100644
--- a/integration_tests/src/main/python/udf_test.py
+++ b/integration_tests/src/main/python/udf_test.py
@@ -14,7 +14,7 @@
 
 import pytest
 
-from conftest import is_at_least_precommit_run
+from conftest import is_at_least_precommit_run, is_not_utc
 from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_340_or_later
 
 from pyspark.sql.pandas.utils import require_minimum_pyarrow_version, require_minimum_pandas_version
@@ -90,6 +90,7 @@ def iterator_add(to_process: Iterator[Tuple[pd.Series, pd.Series]]) -> Iterator[
 
 
 @pytest.mark.parametrize('data_gen', data_gens_nested_for_udf, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_pandas_scalar_udf_nested_type(data_gen):
     def nested_size(nested):
         return pd.Series([nested.size]).repeat(len(nested))
@@ -116,6 +117,7 @@ def pandas_sum(to_process: pd.Series) -> float:
 
 @approximate_float
 @pytest.mark.parametrize('data_gen', arrow_common_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_single_aggregate_udf_more_types(data_gen):
     @f.pandas_udf('double')
     def group_size_udf(to_process: pd.Series) -> float:
@@ -146,6 +148,7 @@ def pandas_sum(to_process: pd.Series) -> int:
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', arrow_common_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_group_aggregate_udf_more_types(data_gen):
     @f.pandas_udf('long')
     def group_size_udf(to_process: pd.Series) -> int:
@@ -261,6 +264,7 @@ def pandas_add(data):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', arrow_common_gen, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_group_apply_udf_more_types(data_gen):
     def group_size_udf(key, pdf):
         return pd.DataFrame([[len(key), len(pdf), len(pdf.columns)]])
@@ -288,6 +292,7 @@ def pandas_filter(iterator):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', data_gens_nested_for_udf, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_pandas_map_udf_nested_type(data_gen):
     # Supported UDF output types by plugin: (commonCudfTypes + ARRAY).nested() + STRUCT
     # STRUCT represents the whole dataframe in Map Pandas UDF, so no struct column in UDF output.
diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py
index e01c68ed35c..d850403d118 100644
--- a/integration_tests/src/main/python/window_function_test.py
+++ b/integration_tests/src/main/python/window_function_test.py
@@ -15,6 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql, assert_gpu_fallback_collect, assert_gpu_sql_fallback_collect
+from conftest import is_not_utc
 from data_gen import *
 from marks import *
 from pyspark.sql.types import *
@@ -450,6 +451,7 @@ def test_range_windows_with_string_order_by_column(data_gen, batch_size):
 # the order returned should be consistent because the data ends up in a single task (no partitioning)
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 @pytest.mark.parametrize('b_gen', all_basic_gens + [decimal_gen_32bit, decimal_gen_128bit], ids=meta_idfn('data:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_batched_unbounded_no_part(b_gen, batch_size):
     conf = {'spark.rapids.sql.batchSizeBytes': batch_size,
             'spark.rapids.sql.castFloatToDecimal.enabled': True}
@@ -467,6 +469,7 @@ def test_window_batched_unbounded_no_part(b_gen, batch_size):
 
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 @pytest.mark.parametrize('b_gen', all_basic_gens + [decimal_gen_32bit, decimal_gen_128bit], ids=meta_idfn('data:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_batched_unbounded(b_gen, batch_size):
     conf = {'spark.rapids.sql.batchSizeBytes': batch_size,
             'spark.rapids.sql.castFloatToDecimal.enabled': True}
@@ -487,6 +490,7 @@ def test_window_batched_unbounded(b_gen, batch_size):
 # the order returned should be consistent because the data ends up in a single task (no partitioning)
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 @pytest.mark.parametrize('b_gen', all_basic_gens + [decimal_gen_32bit, decimal_gen_128bit], ids=meta_idfn('data:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_rows_based_running_window_unpartitioned(b_gen, batch_size):
     conf = {'spark.rapids.sql.batchSizeBytes': batch_size,
             'spark.rapids.sql.castFloatToDecimal.enabled': True}
@@ -522,6 +526,7 @@ def test_rows_based_running_window_unpartitioned(b_gen, batch_size):
 
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn)  # Testing multiple batch sizes.
 @pytest.mark.parametrize('a_gen', integral_gens + [string_gen, date_gen, timestamp_gen], ids=meta_idfn('data:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_running_window_without_partitions_runs_batched(a_gen, batch_size):
     """
     This tests the running window optimization as applied to RANGE-based window specifications,
@@ -645,6 +650,7 @@ def test_running_window_float_sum_without_partitions_runs_batched(batch_size):
 @pytest.mark.parametrize('data_gen',
                          all_basic_gens + [decimal_gen_32bit, orderable_decimal_gen_128bit],
                          ids=meta_idfn('data:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_running_rank_no_part(data_gen):
     # Keep the batch size small. We have tested these with operators with exact inputs already, this is mostly
     # testing the fixup operation.
@@ -672,6 +678,7 @@ def test_window_running_rank_no_part(data_gen):
 # but small batch sizes can make sort very slow, so do the final order by locally
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_basic_gens + [decimal_gen_32bit], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_running_rank(data_gen):
     # Keep the batch size small. We have tested these with operators with exact inputs already, this is mostly
     # testing the fixup operation.
@@ -699,6 +706,7 @@ def test_window_running_rank(data_gen):
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 @pytest.mark.parametrize('b_gen, c_gen', [(long_gen, x) for x in running_part_and_order_gens] +
         [(x, long_gen) for x in all_basic_gens + [decimal_gen_32bit]], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_rows_based_running_window_partitioned(b_gen, c_gen, batch_size):
     conf = {'spark.rapids.sql.batchSizeBytes': batch_size,
             'spark.rapids.sql.variableFloatAgg.enabled': True,
@@ -738,6 +746,7 @@ def test_rows_based_running_window_partitioned(b_gen, c_gen, batch_size):
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn)  # Test different batch sizes.
 @pytest.mark.parametrize('part_gen', [int_gen, long_gen], ids=idfn)  # Partitioning is not really the focus of the test.
 @pytest.mark.parametrize('order_gen', [x for x in all_basic_gens_no_null if x not in boolean_gens] + [decimal_gen_32bit], ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_range_running_window_runs_batched(part_gen, order_gen, batch_size):
     """
     This tests the running window optimization as applied to RANGE-based window specifications,
@@ -881,6 +890,7 @@ def window(oby_column):
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 @pytest.mark.parametrize('c_gen', lead_lag_data_gens, ids=idfn)
 @pytest.mark.parametrize('a_b_gen', part_and_order_gens, ids=meta_idfn('partAndOrderBy:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_multi_types_window_aggs_for_rows_lead_lag(a_b_gen, c_gen, batch_size):
     conf = {'spark.rapids.sql.batchSizeBytes': batch_size}
     data_gen = [
@@ -938,6 +948,7 @@ def do_it(spark):
 @approximate_float
 @pytest.mark.parametrize('struct_gen', lead_lag_struct_with_arrays_gen, ids=idfn)
 @pytest.mark.parametrize('a_b_gen', part_and_order_gens, ids=meta_idfn('partAndOrderBy:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_lead_lag_for_structs_with_arrays(a_b_gen, struct_gen):
     data_gen = [
         ('a', RepeatSeqGen(a_b_gen, length=20)),
@@ -971,6 +982,7 @@ def do_it(spark):
 @pytest.mark.parametrize('c_gen', [UniqueLongGen()], ids=meta_idfn('orderBy:'))
 @pytest.mark.parametrize('b_gen', [long_gen], ids=meta_idfn('orderBy:'))
 @pytest.mark.parametrize('a_gen', [long_gen], ids=meta_idfn('partBy:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_for_rows_lead_lag_on_arrays(a_gen, b_gen, c_gen, d_gen):
     data_gen = [
             ('a', RepeatSeqGen(a_gen, length=20)),
@@ -1000,6 +1012,7 @@ def test_window_aggs_for_rows_lead_lag_on_arrays(a_gen, b_gen, c_gen, d_gen):
 @approximate_float
 @pytest.mark.parametrize('c_gen', [string_gen], ids=idfn)
 @pytest.mark.parametrize('a_b_gen', part_and_order_gens, ids=meta_idfn('partAndOrderBy:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_multi_types_window_aggs_for_rows(a_b_gen, c_gen):
     data_gen = [
             ('a', RepeatSeqGen(a_b_gen, length=20)),
@@ -1057,6 +1070,7 @@ def do_it(spark):
 @pytest.mark.parametrize('c_gen', [UniqueLongGen()], ids=meta_idfn('orderBy:'))
 @pytest.mark.parametrize('b_gen', [long_gen], ids=meta_idfn('orderBy:'))
 @pytest.mark.parametrize('a_gen', [long_gen], ids=meta_idfn('partBy:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_lead_ignore_nulls_fallback(a_gen, b_gen, c_gen, d_gen):
     data_gen = [
             ('a', RepeatSeqGen(a_gen, length=20)),
@@ -1081,6 +1095,7 @@ def test_window_aggs_lead_ignore_nulls_fallback(a_gen, b_gen, c_gen, d_gen):
 @pytest.mark.parametrize('c_gen', [UniqueLongGen()], ids=meta_idfn('orderBy:'))
 @pytest.mark.parametrize('b_gen', [long_gen], ids=meta_idfn('orderBy:'))
 @pytest.mark.parametrize('a_gen', [long_gen], ids=meta_idfn('partBy:'))
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_lag_ignore_nulls_fallback(a_gen, b_gen, c_gen, d_gen):
     data_gen = [
             ('a', RepeatSeqGen(a_gen, length=20)),
@@ -1105,6 +1120,7 @@ def test_window_aggs_lag_ignore_nulls_fallback(a_gen, b_gen, c_gen, d_gen):
 @pytest.mark.parametrize('data_gen', [_grpkey_longs_with_timestamps,
                                       pytest.param(_grpkey_longs_with_nullable_timestamps)],
                                       ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_for_ranges_timestamps(data_gen):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark: gen_df(spark, data_gen, length=2048),
@@ -1252,6 +1268,7 @@ def test_window_aggregations_for_big_decimal_ranges(data_gen):
 
 # SortExec does not support array type, so sort the result locally.
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_for_rows_collect_list():
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, _gen_data_for_collect_list),
@@ -1298,6 +1315,7 @@ def test_window_aggs_for_rows_collect_list():
 @ignore_order(local=True)
 # This test is more directed at Databricks and their running window optimization instead of ours
 # this is why we do not validate that we inserted in a GpuRunningWindowExec, yet.
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_running_window_function_exec_for_all_aggs():
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, _gen_data_for_collect_list),
@@ -1406,6 +1424,7 @@ def do_it(spark):
 
 # SortExec does not support array type, so sort the result locally.
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_for_rows_collect_set():
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark: gen_df(spark, _gen_data_for_collect_set),
@@ -1467,6 +1486,7 @@ def test_window_aggs_for_rows_collect_set():
 # and https://github.com/rapidsai/cudf/issues/11222
 @ignore_order(local=True)
 @allow_non_gpu("ProjectExec", "SortArray")
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_aggs_for_rows_collect_set_nested_array():
     conf = copy_and_update(_float_conf, {
         "spark.rapids.sql.castFloatToString.enabled": "true",
@@ -1579,6 +1599,7 @@ def do_it(spark):
 # but small batch sizes can make sort very slow, so do the final order by locally
 @ignore_order(local=True)
 @pytest.mark.parametrize('ride_along', all_basic_gens + decimal_gens + array_gens_sample + struct_gens_sample + map_gens_sample, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_ride_along(ride_along):
     assert_gpu_and_cpu_are_equal_sql(
             lambda spark : gen_df(spark, [('a', UniqueLongGen()), ('b', ride_along)]),
@@ -1654,6 +1675,7 @@ def test_unbounded_to_unbounded_window():
     'last(a) IGNORE NULLS OVER (PARTITION BY b ORDER BY c) '
 
 @pytest.mark.parametrize('data_gen', all_basic_gens_no_null + decimal_gens + _nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_first_last_nth(data_gen):
     assert_gpu_and_cpu_are_equal_sql(
         # Coalesce is to make sure that first and last, which are non-deterministic become deterministic
@@ -1664,6 +1686,7 @@ def test_window_first_last_nth(data_gen):
 
 @pytest.mark.skipif(is_before_spark_320(), reason='IGNORE NULLS clause is not supported for FIRST(), LAST() and NTH_VALUE in Spark 3.1.x')
 @pytest.mark.parametrize('data_gen', all_basic_gens_no_null + decimal_gens + _nested_gens, ids=idfn)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_window_first_last_nth_ignore_nulls(data_gen):
     assert_gpu_and_cpu_are_equal_sql(
         # Coalesce is to make sure that first and last, which are non-deterministic become deterministic
@@ -1674,6 +1697,7 @@ def test_window_first_last_nth_ignore_nulls(data_gen):
 
 
 @ignore_order(local=True)
+@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_to_date_with_window_functions():
     """
     This test ensures that date expressions participating alongside window aggregations
diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index a13b5137af0..c9b1369807c 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -155,6 +155,12 @@ ci_2() {
     # Download a Scala 2.12 build of spark
     prepare_spark $SPARK_VER 2.12
     ./integration_tests/run_pyspark_from_build.sh
+
+    # Test a portion of cases for non-UTC time zone because of limited GPU resources.
+    # Here testing: parquet scan, orc scan, csv scan, cast, TimeZoneAwareExpression, FromUTCTimestamp
+    # Nightly CIs will cover all the cases.
+    TZ=Iran TEST='test_parquet_read_round_trip or test_read_round_trip or test_basic_csv_read or test_cast_string_ts_valid_format or test_unix_timestamp or test_from_utc_timestamp' ./integration_tests/run_pyspark_from_build.sh
+
     # enable avro test separately
     INCLUDE_SPARK_AVRO_JAR=true TEST='avro_test.py' ./integration_tests/run_pyspark_from_build.sh
     # export 'LC_ALL' to set locale with UTF-8 so regular expressions are enabled

From b46a6e682fd0bd5b0d0ed0e75c302a6caadaaac9 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Mon, 27 Nov 2023 10:05:00 -0600
Subject: [PATCH 14/25] Avoid using NaNs as Delta Lake partition values (#9840)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 .../src/main/python/delta_lake_write_test.py  | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/integration_tests/src/main/python/delta_lake_write_test.py b/integration_tests/src/main/python/delta_lake_write_test.py
index b62a6992ff0..06760d5bc60 100644
--- a/integration_tests/src/main/python/delta_lake_write_test.py
+++ b/integration_tests/src/main/python/delta_lake_write_test.py
@@ -21,12 +21,27 @@
 from conftest import is_databricks_runtime
 from delta_lake_utils import *
 from marks import *
-from parquet_write_test import parquet_part_write_gens, parquet_write_gens_list, writer_confs
+from parquet_write_test import parquet_write_gens_list, writer_confs
 from pyspark.sql.types import *
 from spark_session import is_before_spark_320, is_before_spark_330, is_spark_340_or_later, with_cpu_session
 
 delta_write_gens = [x for sublist in parquet_write_gens_list for x in sublist]
 
+delta_part_write_gens = [
+    byte_gen,
+    short_gen,
+    int_gen,
+    long_gen,
+    # Avoid NaNs since it falsely triggers switch to new file when checking if partition changed
+    FloatGen(no_nans=True),
+    DoubleGen(no_nans=True),
+    # Some file systems have issues with UTF8 strings so to help the test pass even there
+    StringGen('(\\w| ){0,50}'),
+    boolean_gen,
+    date_gen,
+    timestamp_gen
+]
+
 _delta_confs = copy_and_update(writer_confs, delta_writes_enabled_conf,
                                {"spark.rapids.sql.hasExtendedYearValues": "false",
                                 "spark.sql.legacy.parquet.datetimeRebaseModeInRead": "CORRECTED",
@@ -91,7 +106,7 @@ def test_delta_write_round_trip_unmanaged(spark_tmp_path):
 @allow_non_gpu(*delta_meta_allow)
 @delta_lake
 @ignore_order
-@pytest.mark.parametrize("gens", parquet_part_write_gens, ids=idfn)
+@pytest.mark.parametrize("gens", delta_part_write_gens, ids=idfn)
 @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x")
 def test_delta_part_write_round_trip_unmanaged(spark_tmp_path, gens):
     gen_list = [("a", RepeatSeqGen(gens, 10)), ("b", gens)]
@@ -112,7 +127,7 @@ def test_delta_part_write_round_trip_unmanaged(spark_tmp_path, gens):
 @allow_non_gpu(*delta_meta_allow)
 @delta_lake
 @ignore_order
-@pytest.mark.parametrize("gens", parquet_part_write_gens, ids=idfn)
+@pytest.mark.parametrize("gens", delta_part_write_gens, ids=idfn)
 @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x")
 def test_delta_multi_part_write_round_trip_unmanaged(spark_tmp_path, gens):
     gen_list = [("a", RepeatSeqGen(gens, 10)), ("b", gens), ("c", SetValuesGen(StringType(), ["x", "y", "z"]))]

From d8b0a417d6b90c8528ff98052ca00985a26b28db Mon Sep 17 00:00:00 2001
From: Matt Ahrens <matthewahrens@gmail.com>
Date: Mon, 27 Nov 2023 12:48:40 -0600
Subject: [PATCH 15/25] Fixing FAQ deadlink in plugin code (#9860)

Signed-off-by: mattahrens <matthewahrens@gmail.com>
---
 sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
index 6520ff4c1b7..224a530bf99 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
@@ -106,7 +106,8 @@ object RapidsPluginUtils extends Logging {
     if (conf.isUdfCompilerEnabled) {
       logWarning("Experimental RAPIDS UDF compiler is enabled, in case of related failures " +
       s"disable it by setting `${RapidsConf.UDF_COMPILER_ENABLED}` to false. " +
-      "More information is available at https://nvidia.github.io/spark-rapids/docs/FAQ.html#" +
+      "More information is available at " +
+      "https://docs.nvidia.com/spark-rapids/user-guide/latest/faq.html#" +
       "automatic-translation-of-scala-udfs-to-apache-spark-operations" )
     }
   }

From 15ac0477cdc2c6b186619f690b8da18233a3ca75 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Mon, 27 Nov 2023 16:20:21 -0600
Subject: [PATCH 16/25] Add compatibility documentation with respect to decimal
 overflow detection (#9864)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 docs/compatibility.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/compatibility.md b/docs/compatibility.md
index 53b39ec251e..79ba3f0da66 100644
--- a/docs/compatibility.md
+++ b/docs/compatibility.md
@@ -83,6 +83,21 @@ after Spark 3.1.0.
 We do not disable operations that produce different results due to `-0.0` in the data because it is
 considered to be a rare occurrence.
 
+## Decimal Support
+
+Apache Spark supports decimal values with a precision up to 38. This equates to 128-bits.
+When processing the data, in most cases, it is temporarily converted to Java's `BigDecimal` type
+which allows for effectively unlimited precision. Overflows will be detected whenever the
+`BigDecimal` value is converted back into the Spark decimal type.
+
+The RAPIDS Accelerator does not implement a GPU equivalent of `BigDecimal`, but it does implement
+computation on 256-bit values to allow the detection of overflows. The points at which overflows
+are detected may differ between the CPU and GPU. Spark gives no guarantees that overflows are
+detected if an intermediate value could overflow the original decimal type during computation
+but the final value does not (e.g.: a sum of values with many large positive values followed by
+many large negative values). Spark injects overflow detection at various points during aggregation,
+and these points can fluctuate depending on cluster shape and number of shuffle partitions.
+
 ## Unicode
 
 Spark delegates Unicode operations to the underlying JVM. Each version of Java complies with a

From 26c9e372b2ab7c6ffc61ce7aa68566e5985b5061 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Mon, 27 Nov 2023 18:52:35 -0600
Subject: [PATCH 17/25] Fix problems with nulls in sequence tests (#9865)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 integration_tests/src/main/python/collection_ops_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py
index 43cc782df0f..ea9eb4538df 100644
--- a/integration_tests/src/main/python/collection_ops_test.py
+++ b/integration_tests/src/main/python/collection_ops_test.py
@@ -278,6 +278,8 @@ def test_sequence_with_step(start_gen, stop_gen, step_gen):
     # Get a step scalar from the 'step_gen' which follows the rules.
     step_gen.start(random.Random(data_gen_seed))
     step_lit = step_gen.gen()
+    if step_lit is None:
+        step_lit = "null"
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: three_col_df(spark, start_gen, stop_gen, step_gen).selectExpr(
             "sequence(a, b, c)",
@@ -327,8 +329,8 @@ def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen):
 # Exceed the max length of a sequence
 #     "Too long sequence: xxxxxxxxxx. Should be <= 2147483632"
 sequence_too_long_length_gens = [
-    IntegerGen(min_val=2147483633, max_val=2147483633, special_cases=[]),
-    LongGen(min_val=2147483635, max_val=2147483635, special_cases=[None])
+    IntegerGen(min_val=2147483633, max_val=2147483633, special_cases=[], nullable=False),
+    LongGen(min_val=2147483635, max_val=2147483635, special_cases=[], nullable=False)
 ]
 
 @pytest.mark.parametrize('stop_gen', sequence_too_long_length_gens, ids=idfn)

From 9af612f8187d53226721e801ab49a810a5b47c67 Mon Sep 17 00:00:00 2001
From: Navin Kumar <97137715+NVnavkumar@users.noreply.github.com>
Date: Tue, 28 Nov 2023 15:27:36 -0800
Subject: [PATCH 18/25] Support from_utc_timestamp on the GPU for non-UTC
 timezones (non-DST) (#9810)

* Updates to test suite to start using GpuTimeZoneDB

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Initialize the GPU timezone database in the plugin initialization for executor, use this in tests

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Remove GpuTimeZoneDB init from plugin for now, eventually add again and hide behind a config flag when first expression using it is completed.

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Add plugin initialization code and change config flag for non UTC timezone support

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Use GpuTimeZoneDB to run from_utc_timestamp on the GPU for non-UTC non-DST timezones

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Fix formatting of includes here

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Add some Olson timezones to fallback test and add config option to test proper fallback

Signed-off-by: Navin Kumar <navink@nvidia.com>

* Add fallback test for when config option is not enabled

Signed-off-by: Navin Kumar <navink@nvidia.com>

---------

Signed-off-by: Navin Kumar <navink@nvidia.com>
---
 .../src/main/python/date_time_test.py         |  19 ++-
 .../nvidia/spark/rapids/GpuOverrides.scala    |   4 +
 .../com/nvidia/spark/rapids/Plugin.scala      |   2 +
 .../com/nvidia/spark/rapids/RapidsConf.scala  |  15 +-
 .../apache/spark/sql/rapids/TimeZoneDB.scala  |   9 +-
 .../sql/rapids/datetimeExpressions.scala      |  25 ++-
 .../spark/rapids/timezone/TimeZoneSuite.scala | 157 +++++++++++++-----
 7 files changed, 157 insertions(+), 74 deletions(-)

diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py
index 1d984193f9e..d68dd93efac 100644
--- a/integration_tests/src/main/python/date_time_test.py
+++ b/integration_tests/src/main/python/date_time_test.py
@@ -286,22 +286,31 @@ def test_from_utc_timestamp(data_gen, time_zone):
         lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)))
 
 @allow_non_gpu('ProjectExec')
-@pytest.mark.parametrize('time_zone', ["PST", "NST", "AST"], ids=idfn)
+@pytest.mark.parametrize('time_zone', ["Asia/Shanghai", "EST", "MST", "VST", "PST", "NST", "AST", "America/Los_Angeles", "America/New_York", "America/Chicago"], ids=idfn)
 @pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
-def test_from_utc_timestamp_unsupported_timezone_fallback(data_gen, time_zone):
+def test_from_utc_timestamp_non_utc_fallback(data_gen, time_zone):
     assert_gpu_fallback_collect(
         lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)),
     'FromUTCTimestamp')
 
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('time_zone', ["PST", "NST", "AST", "America/Los_Angeles", "America/New_York", "America/Chicago"], ids=idfn)
+@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
+def test_from_utc_timestamp_unsupported_timezone_fallback(data_gen, time_zone):
+    assert_gpu_fallback_collect(
+        lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)),
+    'FromUTCTimestamp',
+    conf = {"spark.rapids.sql.nonUTC.enabled": "true"})
+
 
 @pytest.mark.parametrize('time_zone', ["UTC", "Asia/Shanghai", "EST", "MST", "VST"], ids=idfn)
 @pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
 @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
 def test_from_utc_timestamp_supported_timezones(data_gen, time_zone):
-    # Remove spark.rapids.test.CPU.timezone configuration when GPU kernel is ready to really test on GPU
+    # TODO: Remove spark.rapids.sql.nonUTC.enabled configuration 
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)), conf = {"spark.rapids.test.CPU.timezone": "true"})
-
+        lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)),
+        conf = {"spark.rapids.sql.nonUTC.enabled": "true"})
 
 @allow_non_gpu('ProjectExec')
 @pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 8119e78d988..fa9346f0ef4 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -621,6 +621,10 @@ object GpuOverrides extends Logging {
     }
   }
 
+  def isUTCTimezone(timezoneId: ZoneId): Boolean = {
+    timezoneId.normalized() == UTC_TIMEZONE_ID
+  }
+
   def areAllSupportedTypes(types: DataType*): Boolean = types.forall(isSupportedType(_))
 
   /**
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
index 224a530bf99..18cbd7a26bb 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
@@ -26,6 +26,7 @@ import scala.util.Try
 
 import ai.rapids.cudf.{Cuda, CudaException, CudaFatalException, CudfException, MemoryCleaner}
 import com.nvidia.spark.rapids.filecache.{FileCache, FileCacheLocalityManager, FileCacheLocalityMsg}
+import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
 import com.nvidia.spark.rapids.python.PythonWorkerSemaphore
 import org.apache.commons.lang3.exception.ExceptionUtils
 
@@ -504,6 +505,7 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging {
   }
 
   override def shutdown(): Unit = {
+    GpuTimeZoneDB.shutdown()
     GpuSemaphore.shutdown()
     PythonWorkerSemaphore.shutdown()
     GpuDeviceManager.shutdown()
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index ffc37cebb72..9abaccfa0f6 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -2044,6 +2044,13 @@ object RapidsConf {
         "The gpu to disk spill bounce buffer must have a positive size")
       .createWithDefault(128L * 1024 * 1024)
 
+  val NON_UTC_TIME_ZONE_ENABLED = 
+    conf("spark.rapids.sql.nonUTC.enabled")
+      .doc("An option to enable/disable non-UTC time zone support.")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
   val SPLIT_UNTIL_SIZE_OVERRIDE = conf("spark.rapids.sql.test.overrides.splitUntilSize")
       .doc("Only for tests: override the value of GpuDeviceManager.splitUntilSize")
       .internal()
@@ -2056,12 +2063,6 @@ object RapidsConf {
     .booleanConf
     .createOptional
 
-  val TEST_USE_TIMEZONE_CPU_BACKEND = conf("spark.rapids.test.CPU.timezone")
-    .doc("Only for tests: verify for timezone related functions")
-    .internal()
-    .booleanConf
-    .createOptional
-
   private def printSectionHeader(category: String): Unit =
     println(s"\n### $category")
 
@@ -2754,6 +2755,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val splitUntilSizeOverride: Option[Long] = get(SPLIT_UNTIL_SIZE_OVERRIDE)
 
+  lazy val nonUTCTimeZoneEnabled: Boolean = get(NON_UTC_TIME_ZONE_ENABLED)
+
   private val optimizerDefaults = Map(
     // this is not accurate because CPU projections do have a cost due to appending values
     // to each row that is produced, but this needs to be a really small number because
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/TimeZoneDB.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/TimeZoneDB.scala
index 2b1b8e6576b..91c1928cc00 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/TimeZoneDB.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/TimeZoneDB.scala
@@ -20,15 +20,10 @@ import java.time.ZoneId
 
 import ai.rapids.cudf.{ColumnVector, DType, HostColumnVector}
 import com.nvidia.spark.rapids.Arm.withResource
-import com.nvidia.spark.rapids.GpuOverrides
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 
 object TimeZoneDB {
-  def isUTCTimezone(timezoneId: ZoneId): Boolean = {
-    timezoneId.normalized() == GpuOverrides.UTC_TIMEZONE_ID
-  }
-
   // Copied from Spark. Used to format time zone ID string with (+|-)h:mm and (+|-)hh:m
   def getZoneId(timezoneId: String): ZoneId = {
     val formattedZoneId = timezoneId
@@ -40,7 +35,7 @@ object TimeZoneDB {
   }
 
   // Support fixed offset or no transition rule case
-  def isSupportedTimezone(timezoneId: String): Boolean = {
+  def isSupportedTimeZone(timezoneId: String): Boolean = {
     val rules = getZoneId(timezoneId).getRules
     rules.isFixedOffset || rules.getTransitionRules.isEmpty
   }
@@ -153,7 +148,7 @@ object TimeZoneDB {
     assert(inputVector.getType == DType.TIMESTAMP_DAYS)
     val rowCount = inputVector.getRowCount.toInt
     withResource(inputVector.copyToHost()) { input =>
-      withResource(HostColumnVector.builder(DType.INT64, rowCount)) { builder =>
+      withResource(HostColumnVector.builder(DType.TIMESTAMP_MICROSECONDS, rowCount)) { builder =>
         var currRow = 0
         while (currRow < rowCount) {
           if (input.isNull(currRow)) {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
index 191b39b4bba..8f6c591787f 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
@@ -20,11 +20,11 @@ import java.time.ZoneId
 import java.util.concurrent.TimeUnit
 
 import ai.rapids.cudf.{BinaryOp, CaptureGroups, ColumnVector, ColumnView, DType, RegexProgram, Scalar}
-import com.nvidia.spark.rapids.{BinaryExprMeta, BoolUtils, DataFromReplacementRule, DateUtils, GpuBinaryExpression, GpuBinaryExpressionArgsAnyScalar, GpuCast, GpuColumnVector, GpuExpression, GpuScalar, GpuUnaryExpression, RapidsConf, RapidsMeta}
+import com.nvidia.spark.rapids.{BinaryExprMeta, BoolUtils, DataFromReplacementRule, DateUtils, GpuBinaryExpression, GpuBinaryExpressionArgsAnyScalar, GpuCast, GpuColumnVector, GpuExpression, GpuOverrides, GpuScalar, GpuUnaryExpression, RapidsConf, RapidsMeta}
 import com.nvidia.spark.rapids.Arm._
 import com.nvidia.spark.rapids.GpuOverrides.{extractStringLit, getTimeParserPolicy}
-import com.nvidia.spark.rapids.RapidsConf.TEST_USE_TIMEZONE_CPU_BACKEND
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
+import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
 import com.nvidia.spark.rapids.shims.ShimBinaryExpression
 
 import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, ExpectsInputTypes, Expression, FromUTCTimestamp, ImplicitCastInputTypes, NullIntolerant, TimeZoneAwareExpression}
@@ -1046,7 +1046,7 @@ class FromUTCTimestampExprMeta(
   extends BinaryExprMeta[FromUTCTimestamp](expr, conf, parent, rule) {
 
   private[this] var timezoneId: ZoneId = null
-  private[this] val isOnCPU: Boolean = conf.get(TEST_USE_TIMEZONE_CPU_BACKEND).getOrElse(false)
+  private[this] val nonUTCEnabled: Boolean = conf.nonUTCTimeZoneEnabled
 
   override def tagExprForGpu(): Unit = {
     extractStringLit(expr.right) match {
@@ -1054,12 +1054,11 @@ class FromUTCTimestampExprMeta(
         willNotWorkOnGpu("timezone input must be a literal string")
       case Some(timezoneShortID) =>
         if (timezoneShortID != null) {
-          timezoneId = TimeZoneDB.getZoneId(timezoneShortID)
+          timezoneId = GpuTimeZoneDB.getZoneId(timezoneShortID)
           // Always pass for UTC timezone since it's no-op.
-          if (!TimeZoneDB.isUTCTimezone(timezoneId)) {
-            // Check CPU path, mostly for test purpose
-            if (isOnCPU) {
-              if(!TimeZoneDB.isSupportedTimezone(timezoneShortID)) {
+          if (!GpuOverrides.isUTCTimezone(timezoneId)) {
+            if (nonUTCEnabled) {
+              if(!GpuTimeZoneDB.isSupportedTimeZone(timezoneShortID)) {
                 willNotWorkOnGpu(s"Not supported timezone type $timezoneShortID.")
               }
             } else {
@@ -1072,11 +1071,11 @@ class FromUTCTimestampExprMeta(
   }
 
   override def convertToGpu(timestamp: Expression, timezone: Expression): GpuExpression =
-    GpuFromUTCTimestamp(timestamp, timezone, timezoneId, isOnCPU)
+    GpuFromUTCTimestamp(timestamp, timezone, timezoneId, nonUTCEnabled)
 }
 
 case class GpuFromUTCTimestamp(
-    timestamp: Expression, timezone: Expression, zoneId: ZoneId, isOnCPU: Boolean)
+    timestamp: Expression, timezone: Expression, zoneId: ZoneId, nonUTCEnabled: Boolean)
   extends GpuBinaryExpressionArgsAnyScalar
       with ImplicitCastInputTypes
       with NullIntolerant {
@@ -1088,12 +1087,12 @@ case class GpuFromUTCTimestamp(
 
   override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = {
     if (rhs.getBase.isValid) {
-      if (TimeZoneDB.isUTCTimezone(zoneId)) {
+      if (GpuOverrides.isUTCTimezone(zoneId)) {
         // For UTC timezone, just a no-op bypassing GPU computation.
         lhs.getBase.incRefCount()
       } else {
-        if (isOnCPU){
-          TimeZoneDB.fromUtcTimestampToTimestamp(lhs.getBase, zoneId)
+        if (nonUTCEnabled){
+          GpuTimeZoneDB.fromUtcTimestampToTimestamp(lhs.getBase, zoneId)
         } else {
           // TODO: remove this until GPU backend supported.
           throw new UnsupportedOperationException(
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/timezone/TimeZoneSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/timezone/TimeZoneSuite.scala
index 20013cde64d..dcfbc508034 100644
--- a/tests/src/test/scala/com/nvidia/spark/rapids/timezone/TimeZoneSuite.scala
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/timezone/TimeZoneSuite.scala
@@ -25,13 +25,22 @@ import scala.collection.mutable
 import ai.rapids.cudf.{ColumnVector, DType, HostColumnVector}
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.SparkQueryCompareTestSuite
+import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
+import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToInstant
 import org.apache.spark.sql.rapids.TimeZoneDB
 import org.apache.spark.sql.types._
 
-class TimeZoneSuite extends SparkQueryCompareTestSuite {
+class TimeZoneSuite extends SparkQueryCompareTestSuite with BeforeAndAfterAll {
+  private val useGPU = true
+  private val testAllTimezones = false
+  private val testAllYears = false
+
+  private var zones = Seq.empty[String]
+
   /**
    * create timestamp column vector
    */
@@ -92,13 +101,24 @@ class TimeZoneSuite extends SparkQueryCompareTestSuite {
   /**
    * assert timestamp result with Spark result
    */
-  def assertTimestampRet(actualRet: ColumnVector, sparkRet: Seq[Row]): Unit = {
+  def assertTimestampRet(actualRet: ColumnVector, sparkRet: Seq[Row], input: ColumnVector): Unit = {
     withResource(actualRet.copyToHost()) { host =>
-      assert(actualRet.getRowCount == sparkRet.length)
-      for (i <- 0 until actualRet.getRowCount.toInt) {
-        val sparkInstant = sparkRet(i).getInstant(0)
-        val sparkMicro = sparkInstant.getEpochSecond * 1000000L + sparkInstant.getNano / 1000L
-        assert(host.getLong(i) == sparkMicro)
+      withResource(input.copyToHost()) { hostInput =>
+        assert(actualRet.getRowCount == sparkRet.length)
+        for (i <- 0 until actualRet.getRowCount.toInt) {
+          val sparkInstant = sparkRet(i).getInstant(0)
+          val sparkMicro = sparkInstant.getEpochSecond * 1000000L + sparkInstant.getNano / 1000L
+          if (hostInput.getType == DType.TIMESTAMP_DAYS) {
+            assert(host.getLong(i) == sparkMicro,
+              s"for ${hostInput.getInt(i)} " +
+                s"${microsToInstant(host.getLong(i))} != ${microsToInstant(sparkMicro)}")
+
+          } else {
+            assert(host.getLong(i) == sparkMicro,
+              s"for ${hostInput.getLong(i)} (${microsToInstant(hostInput.getLong(i))}) " +
+                s"${microsToInstant(host.getLong(i))} != ${microsToInstant(sparkMicro)}")
+          }
+        }
       }
     }
   }
@@ -161,18 +181,24 @@ class TimeZoneSuite extends SparkQueryCompareTestSuite {
           .set("spark.sql.datetime.java8API.enabled", "true"))
 
     // get result from TimeZoneDB
-    val actualRet = withResource(createColumnVector(epochSeconds)) { inputCv =>
-      TimeZoneDB.fromUtcTimestampToTimestamp(
-        inputCv,
-        ZoneId.of(zoneStr))
+    withResource(createColumnVector(epochSeconds)) { inputCv =>
+      val actualRet = if (useGPU) {
+        GpuTimeZoneDB.fromUtcTimestampToTimestamp(
+          inputCv,
+          ZoneId.of(zoneStr))
+      } else {
+        TimeZoneDB.fromUtcTimestampToTimestamp(
+          inputCv,
+          ZoneId.of(zoneStr))
+      }
+      withResource(actualRet) { _ =>
+        assertTimestampRet(actualRet, sparkRet, inputCv)
+      }
     }
 
-    withResource(actualRet) { _ =>
-      assertTimestampRet(actualRet, sparkRet)
-    }
   }
 
-  def testFromTimestampToUTCTimestamp(epochSeconds: Array[Long], zoneStr: String): Unit = {
+  def testFromTimestampToUtcTimestamp(epochSeconds: Array[Long], zoneStr: String): Unit = {
     // get result from Spark
     val sparkRet = withCpuSparkSession(
       spark => {
@@ -190,15 +216,21 @@ class TimeZoneSuite extends SparkQueryCompareTestSuite {
           .set("spark.sql.datetime.java8API.enabled", "true"))
 
     // get result from TimeZoneDB
-    val actualRet = withResource(createColumnVector(epochSeconds)) { inputCv =>
-      TimeZoneDB.fromTimestampToUtcTimestamp(
-        inputCv,
-        ZoneId.of(zoneStr))
+    withResource(createColumnVector(epochSeconds)) { inputCv =>
+      val actualRet = if (useGPU) {
+        GpuTimeZoneDB.fromTimestampToUtcTimestamp(
+          inputCv,
+          ZoneId.of(zoneStr))
+      } else {
+        TimeZoneDB.fromTimestampToUtcTimestamp(
+          inputCv,
+          ZoneId.of(zoneStr))
+      }
+      withResource(actualRet) { _ =>
+        assertTimestampRet(actualRet, sparkRet, inputCv)
+      }
     }
 
-    withResource(actualRet) { _ =>
-      assertTimestampRet(actualRet, sparkRet)
-    }
   }
 
   def testFromTimestampToDate(epochSeconds: Array[Long], zoneStr: String): Unit = {
@@ -246,15 +278,15 @@ class TimeZoneSuite extends SparkQueryCompareTestSuite {
           .set("spark.sql.datetime.java8API.enabled", "true"))
 
     // get result from TimeZoneDB
-    val actualRet = withResource(createDateColumnVector(epochDays)) { inputCv =>
-      TimeZoneDB.fromDateToTimestamp(
+    withResource(createDateColumnVector(epochDays)) { inputCv =>
+      val actualRet = TimeZoneDB.fromDateToTimestamp(
         inputCv,
         ZoneId.of(zoneStr))
+      withResource(actualRet) { _ =>
+        assertTimestampRet(actualRet, sparkRet, inputCv)
+      }
     }
 
-    withResource(actualRet) { _ =>
-      assertTimestampRet(actualRet, sparkRet)
-    }
   }
 
   def selectWithRepeatZones: Seq[String] = {
@@ -267,36 +299,75 @@ class TimeZoneSuite extends SparkQueryCompareTestSuite {
     repeatZones.slice(0, 2) ++ mustZones
   }
 
-  def selectNonRepeatZones: Seq[String] = {
+  def selectTimeZones: Seq[String] = {
     val mustZones = Array[String]("Asia/Shanghai", "America/Sao_Paulo")
-    val nonRepeatZones = ZoneId.getAvailableZoneIds.asScala.toList.filter { z =>
-      val rules = ZoneId.of(z).getRules
-      // remove this line when we support repeat rules
-      (rules.isFixedOffset || rules.getTransitionRules.isEmpty) && !mustZones.contains(z)
+    if (testAllTimezones) {
+      val nonRepeatZones = ZoneId.getAvailableZoneIds.asScala.toList.filter { z =>
+        val rules = ZoneId.of(z).getRules
+        // remove this line when we support repeat rules
+        (rules.isFixedOffset || rules.getTransitionRules.isEmpty) && !mustZones.contains(z)
+      }
+      scala.util.Random.shuffle(nonRepeatZones)
+      nonRepeatZones.slice(0, 2) ++ mustZones
+    } else {
+      mustZones
     }
-    scala.util.Random.shuffle(nonRepeatZones)
-    nonRepeatZones.slice(0, 2) ++ mustZones
   }
 
-  test("test all time zones") {
-    assume(false,
-      "It's time consuming for test all time zones, by default it's disabled")
+  override def beforeAll(): Unit = {
+    zones = selectTimeZones
+  }
 
-    val zones = selectNonRepeatZones
-    // iterate zones
+  override def afterAll(): Unit = {
+    if (useGPU) {
+      GpuTimeZoneDB.shutdown()
+    }
+  }
+
+  test("test timestamp to utc timestamp") {
+    for (zoneStr <- zones) {
+      // iterate years
+      val startYear = if (testAllYears) 1 else 1899
+      val endYear = if (testAllYears) 9999 else 2030
+      for (year <- startYear until endYear by 7) {
+        val epochSeconds = getEpochSeconds(year, year + 1)
+        testFromTimestampToUtcTimestamp(epochSeconds, zoneStr)
+      }
+    }
+  }
+
+  test("test utc timestamp to timestamp") {
     for (zoneStr <- zones) {
       // iterate years
-      val startYear = 1
-      val endYear = 9999
+      val startYear = if (testAllYears) 1 else 1899
+      val endYear = if (testAllYears) 9999 else 2030
       for (year <- startYear until endYear by 7) {
         val epochSeconds = getEpochSeconds(year, year + 1)
         testFromUtcTimeStampToTimestamp(epochSeconds, zoneStr)
-        testFromTimestampToUTCTimestamp(epochSeconds, zoneStr)
-        testFromTimestampToDate(epochSeconds, zoneStr)
       }
+    }
+  }
+
+  test("test timestamp to date") {
+    for (zoneStr <- zones) {
+      // iterate years
+      val startYear = if (testAllYears) 1 else 1899
+      val endYear = if (testAllYears) 9999 else 2030
+      for (year <- startYear until endYear by 7) {
+        val epochSeconds = getEpochSeconds(year, year + 1)
+         testFromTimestampToDate(epochSeconds, zoneStr)
+      }
+    }
+  }
 
+  test("test date to timestamp") {
+    for (zoneStr <- zones) {
+      // iterate years
+      val startYear = if (testAllYears) 1 else 1899
+      val endYear = if (testAllYears) 9999 else 2030
       val epochDays = getEpochDays(startYear, endYear)
       testFromDateToTimestamp(epochDays, zoneStr)
     }
   }
+
 }

From b335a66195c05ae90b31b206a00b72e109b1fcd9 Mon Sep 17 00:00:00 2001
From: Chong Gao <chongg@nvidia.com>
Date: Wed, 29 Nov 2023 13:41:26 +0800
Subject: [PATCH 19/25] Preparation for non-UTC nightly CI [skip ci] (#9885)

* Preparation for non-UTC nightly CI

Signed-off-by: Chong Gao <res_life@163.com>

* Add comments

* Select time zone according to day of week

* update comments

---------

Signed-off-by: Chong Gao <res_life@163.com>
Co-authored-by: Chong Gao <res_life@163.com>
---
 jenkins/spark-tests.sh | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
index 17cf4c747f7..d9e7bcf8928 100755
--- a/jenkins/spark-tests.sh
+++ b/jenkins/spark-tests.sh
@@ -263,6 +263,21 @@ run_pyarrow_tests() {
   ./run_pyspark_from_build.sh -m pyarrow_test --pyarrow_test
 }
 
+run_non_utc_time_zone_tests() {
+  # select one time zone according to current day of week
+  non_utc_time_zones=("Asia/Shanghai" "Iran")
+  time_zones_length=${#non_utc_time_zones[@]}
+  # get day of week, Sunday is represented by 0 and Saturday by 6
+  current_date=$(date +%w)
+  echo "Current day of week is: ${current_date}"
+  time_zone_index=$((current_date % time_zones_length))
+  time_zone="${non_utc_time_zones[${time_zone_index}]}"
+  echo "Run Non-UTC tests, time zone is ${time_zone}"
+
+  # run tests
+  TZ=${time_zone} ./run_pyspark_from_build.sh
+}
+
 # TEST_MODE
 # - DEFAULT: all tests except cudf_udf tests
 # - DELTA_LAKE_ONLY: Delta Lake tests only
@@ -270,6 +285,7 @@ run_pyarrow_tests() {
 # - AVRO_ONLY: avro tests only (with --packages option instead of --jars)
 # - CUDF_UDF_ONLY: cudf_udf tests only, requires extra conda cudf-py lib
 # - MULTITHREADED_SHUFFLE: shuffle tests only
+# - NON_UTC_TZ: test all tests in a non-UTC time zone which is selected according to current day of week.
 TEST_MODE=${TEST_MODE:-'DEFAULT'}
 if [[ $TEST_MODE == "DEFAULT" ]]; then
   ./run_pyspark_from_build.sh
@@ -321,6 +337,11 @@ if [[ "$TEST_MODE" == "DEFAULT" || "$TEST_MODE" == "PYARROW_ONLY" ]]; then
   run_pyarrow_tests
 fi
 
+# Non-UTC time zone tests
+if [[ "$TEST_MODE" == "NON_UTC_TZ" ]]; then
+  run_non_utc_time_zone_tests
+fi
+
 popd
 stop-worker.sh
 stop-master.sh

From 36965638e1aaaf4c1cefac6f99bf485bb9c733e4 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Wed, 29 Nov 2023 13:07:48 -0600
Subject: [PATCH 20/25] Fix test_cast_string_date_valid_format generating year
 0 (#9871)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 integration_tests/src/main/python/cast_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/integration_tests/src/main/python/cast_test.py b/integration_tests/src/main/python/cast_test.py
index dbb41b60bb7..61dad6412e1 100644
--- a/integration_tests/src/main/python/cast_test.py
+++ b/integration_tests/src/main/python/cast_test.py
@@ -61,12 +61,11 @@ def test_cast_nested(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type)))
 
-@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9781")
 def test_cast_string_date_valid_format():
     # In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
     # This provides values that are valid in all of those formats.
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, StringGen('[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,2}')).select(f.col('a').cast(DateType())),
+            lambda spark : unary_op_df(spark, StringGen('[0-9]{0,3}[1-9]-[0-9]{1,2}-[0-9]{1,2}')).select(f.col('a').cast(DateType())),
             conf = {'spark.rapids.sql.hasExtendedYearValues': 'false'})
 
 invalid_values_string_to_date = ['200', ' 1970A', '1970 A', '1970T',  # not conform to "yyyy" after trim

From 7efcb81769a6c343f01e1a723bed0c03dc335383 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@nvidia.com>
Date: Wed, 29 Nov 2023 13:50:49 -0700
Subject: [PATCH 21/25] Add date and timestamp support to to_json [databricks]
 (#9600)

* Add date and timestamp support to to_json

* fall back to CPU for non-default date and timestamp format options

* add link to follow on issue for supporting custom formats

* update comment

Signed-off-by: Andy Grove <andygrove@nvidia.com>

* remove blank line

* add support for Etc/UTC timezone

* generate docs

* Remove redundant cast

* fix merge conflict

* fix merge conflict

* fix merge conflict

* fix merge conflict

* Use parsed JSONOptions

* use timestampFormatInWrite not InRead and add fallback tests

* shims

* more tests

* update test

* move UTC timezone list to GpuOverrides

* fix merge conflict

* remove hard-coded list of timezones

---------

Signed-off-by: Andy Grove <andygrove@nvidia.com>
---
 docs/compatibility.md                         |   1 -
 docs/supported_ops.md                         |  10 +-
 .../src/main/python/json_test.py              | 152 +++++++++++++++++-
 .../com/nvidia/spark/rapids/GpuCast.scala     |  17 +-
 .../nvidia/spark/rapids/GpuOverrides.scala    |  18 +--
 .../spark/sql/rapids/GpuStructsToJson.scala   |  59 ++++++-
 .../sql/catalyst/json/GpuJsonUtils.scala      |  14 ++
 .../sql/catalyst/json/GpuJsonUtils.scala      |  14 ++
 .../sql/catalyst/json/GpuJsonUtils.scala      |  14 ++
 .../sql/catalyst/json/GpuJsonUtils.scala      |  13 ++
 tools/generated_files/supportedExprs.csv      |   2 +-
 11 files changed, 285 insertions(+), 29 deletions(-)

diff --git a/docs/compatibility.md b/docs/compatibility.md
index 79ba3f0da66..9d411f56d50 100644
--- a/docs/compatibility.md
+++ b/docs/compatibility.md
@@ -375,7 +375,6 @@ with Spark, and can be enabled by setting `spark.rapids.sql.expression.StructsTo
 
 Known issues are:
 
-- There is no support for timestamp types
 - There can be rounding differences when formatting floating-point numbers as strings. For example, Spark may
   produce `-4.1243574E26` but the GPU may produce `-4.124357351E26`.
 - Not all JSON options are respected
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index 1566a291f36..d691b3994d2 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -14541,16 +14541,16 @@ are limited.
 <td>S</td>
 <td>S</td>
 <td>S</td>
-<td> </td>
-<td> </td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
 <td>S</td>
 <td> </td>
 <td> </td>
 <td> </td>
 <td> </td>
-<td>S</td>
-<td>S</td>
-<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
 <td> </td>
 </tr>
 <tr>
diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
index 41571a203d5..29bc2d3125a 100644
--- a/integration_tests/src/main/python/json_test.py
+++ b/integration_tests/src/main/python/json_test.py
@@ -784,8 +784,8 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
     long_gen,
     pytest.param(float_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9350')),
     pytest.param(double_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9350')),
-    pytest.param(date_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9515')),
-    pytest.param(timestamp_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9515')),
+    date_gen,
+    timestamp_gen,
     StringGen('[A-Za-z0-9\r\n\'"\\\\]{0,10}', nullable=True) \
         .with_special_case('\u1f600') \
         .with_special_case('"a"') \
@@ -800,8 +800,13 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
     pytest.param(True, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9517')),
     False
 ])
+@pytest.mark.parametrize('timezone', [
+    'UTC',
+    'Etc/UTC',
+    pytest.param('UTC+07:00', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+])
 @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
-def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty):
+def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty, timezone):
     struct_gen = StructGen([
         ('a', data_gen),
         ("b", StructGen([('child', data_gen)], nullable=True)),
@@ -813,7 +818,8 @@ def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty):
     gen = StructGen([('my_struct', struct_gen)], nullable=False)
 
     options = { 'ignoreNullFields': ignore_null_fields,
-                'pretty': pretty }
+                'pretty': pretty,
+                'timeZone': timezone}
 
     def struct_to_json(spark):
         df = gen_df(spark, gen)
@@ -825,3 +831,141 @@ def struct_to_json(spark):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : struct_to_json(spark),
         conf=conf)
+
+@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
+@pytest.mark.parametrize('timestamp_format', [
+    'yyyy-MM-dd\'T\'HH:mm:ss[.SSS][XXX]',
+    pytest.param('yyyy-MM-dd\'T\'HH:mm:ss.SSSXXX', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+    pytest.param('dd/MM/yyyy\'T\'HH:mm:ss[.SSS][XXX]', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+])
+@pytest.mark.parametrize('timezone', [
+    'UTC',
+    'Etc/UTC',
+    pytest.param('UTC+07:00', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+])
+def test_structs_to_json_timestamp(spark_tmp_path, data_gen, timestamp_format, timezone):
+    struct_gen = StructGen([
+        ("b", StructGen([('child', data_gen)], nullable=True)),
+    ], nullable=False)
+    gen = StructGen([('my_struct', struct_gen)], nullable=False)
+
+    options = { 'timestampFormat': timestamp_format,
+                'timeZone': timezone}
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.withColumn("my_json", f.to_json("my_struct", options))
+
+    conf = copy_and_update(_enable_all_types_conf,
+                           { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : struct_to_json(spark),
+        conf=conf)
+
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
+@pytest.mark.parametrize('timezone', ['UTC+07:00'])
+def test_structs_to_json_fallback_timezone(spark_tmp_path, data_gen, timezone):
+    struct_gen = StructGen([
+        ('a', data_gen),
+        ("b", StructGen([('child', data_gen)], nullable=True)),
+        ("c", ArrayGen(StructGen([('child', data_gen)], nullable=True))),
+        ("d", MapGen(LongGen(nullable=False), data_gen)),
+        ("d", MapGen(StringGen('[A-Za-z0-9]{0,10}', nullable=False), data_gen)),
+        ("e", ArrayGen(MapGen(LongGen(nullable=False), data_gen), nullable=True)),
+    ], nullable=False)
+    gen = StructGen([('my_struct', struct_gen)], nullable=False)
+
+    options = { 'timeZone': timezone }
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.withColumn("my_json", f.to_json("my_struct", options)).drop("my_struct")
+
+    conf = copy_and_update(_enable_all_types_conf,
+                           { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_fallback_collect(
+        lambda spark : struct_to_json(spark),
+        'ProjectExec',
+        conf=conf)
+
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('data_gen', [date_gen, timestamp_gen], ids=idfn)
+def test_structs_to_json_fallback_legacy(spark_tmp_path, data_gen):
+    struct_gen = StructGen([
+        ("a", StructGen([('child', data_gen)], nullable=True)),
+    ], nullable=False)
+    gen = StructGen([('my_struct', struct_gen)], nullable=False)
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.withColumn("my_json", f.to_json("my_struct")).drop("my_struct")
+
+    conf = copy_and_update(_enable_all_types_conf,
+        { 'spark.rapids.sql.expression.StructsToJson': True,
+          'spark.sql.legacy.timeParserPolicy': 'LEGACY'})
+
+    assert_gpu_fallback_collect(
+        lambda spark : struct_to_json(spark),
+        'ProjectExec',
+        conf=conf)
+
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('data_gen', [date_gen], ids=idfn)
+@pytest.mark.parametrize('timezone', ['UTC'])
+@pytest.mark.parametrize('date_format', [
+    'yyyy-dd-MM',
+    'dd/MM/yyyy',
+])
+def test_structs_to_json_fallback_date_formats(spark_tmp_path, data_gen, timezone, date_format):
+    struct_gen = StructGen([
+        ('a', data_gen),
+        ("b", StructGen([('child', data_gen)], nullable=True)),
+    ], nullable=False)
+    gen = StructGen([('my_struct', struct_gen)], nullable=False)
+
+    options = { 'timeZone': timezone,
+                'dateFormat': date_format }
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.withColumn("my_json", f.to_json("my_struct", options)).drop("my_struct")
+
+    conf = copy_and_update(_enable_all_types_conf,
+                           { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_fallback_collect(
+        lambda spark : struct_to_json(spark),
+        'ProjectExec',
+        conf=conf)
+
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
+@pytest.mark.parametrize('timezone', ['UTC'])
+@pytest.mark.parametrize('timestamp_format', [
+    'yyyy-MM-dd\'T\'HH:mm:ss.SSSXXX',
+    'dd/MM/yyyy\'T\'HH:mm:ss[.SSS][XXX]',
+])
+def test_structs_to_json_fallback_date_formats(spark_tmp_path, data_gen, timezone, timestamp_format):
+    struct_gen = StructGen([
+        ('a', data_gen),
+        ("b", StructGen([('child', data_gen)], nullable=True)),
+    ], nullable=False)
+    gen = StructGen([('my_struct', struct_gen)], nullable=False)
+
+    options = { 'timeZone': timezone,
+                'timestampFormat': timestamp_format }
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.withColumn("my_json", f.to_json("my_struct", options)).drop("my_struct")
+
+    conf = copy_and_update(_enable_all_types_conf,
+                           { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_fallback_collect(
+        lambda spark : struct_to_json(spark),
+        'ProjectExec',
+        conf=conf)
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
index 2f59cfba072..67bed13f4f5 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
@@ -730,6 +730,7 @@ object GpuCast {
       fromDataType: DataType, options: CastOptions): ColumnVector = fromDataType match {
     case StringType => input.copyToColumnVector()
     case DateType => input.asStrings("%Y-%m-%d")
+    case TimestampType if options.castToJsonString => castTimestampToJson(input)
     case TimestampType => castTimestampToString(input)
     case FloatType | DoubleType => castFloatingTypeToString(input)
     case BinaryType => castBinToString(input, options)
@@ -773,6 +774,14 @@ object GpuCast {
     }
   }
 
+  private def castTimestampToJson(input: ColumnView): ColumnVector = {
+    // we fall back to CPU if the JSON timezone is not UTC, so it is safe
+    // to hard-code `Z` here for now, but we should really add a timestamp
+    // format to CastOptions when we add support for custom formats in
+    // https://github.com/NVIDIA/spark-rapids/issues/9602
+    input.asStrings("%Y-%m-%dT%H:%M:%S.%3fZ")
+  }
+
   /**
    * A 5 steps solution for concatenating string array column. <p>
    * Giving an input with 3 rows:
@@ -932,7 +941,8 @@ object GpuCast {
         // to be represented by the string literal `null`
         val strValue = closeOnExcept(strKey) { _ =>
           withResource(kvStructColumn.getChildColumnView(1)) { valueColumn =>
-            val valueStr = if (valueColumn.getType == DType.STRING) {
+            val dt = valueColumn.getType
+            val valueStr = if (dt == DType.STRING || dt.isDurationType || dt.isTimestampType) {
               withResource(castToString(valueColumn, from.valueType, options)) { valueStr =>
                 addQuotes(valueStr, valueColumn.getRowCount.toInt)
               }
@@ -1102,8 +1112,9 @@ object GpuCast {
         colon: ColumnVector,
         quote: ColumnVector): ColumnVector = {
       val jsonName = StringEscapeUtils.escapeJson(inputSchema(fieldIndex).name)
-      val dataType = inputSchema(fieldIndex).dataType
-      val needsQuoting = dataType == DataTypes.StringType
+      val dt = inputSchema(fieldIndex).dataType
+      val needsQuoting = dt == DataTypes.StringType || dt == DataTypes.DateType ||
+        dt == DataTypes.TimestampType
       withResource(input.getChildColumnView(fieldIndex)) { cv =>
         withResource(ArrayBuffer.empty[ColumnVector]) { attrColumns =>
           // prefix with quoted column name followed by colon
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index fa9346f0ef4..633f1c046ae 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -3604,20 +3604,14 @@ object GpuOverrides extends Logging {
         TypeSig.STRING,
         Seq(ParamCheck("struct",
           (TypeSig.BOOLEAN + TypeSig.STRING + TypeSig.integral + TypeSig.FLOAT +
-            TypeSig.DOUBLE + TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested(),
+            TypeSig.DOUBLE + TypeSig.DATE + TypeSig.TIMESTAMP +
+            TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested(),
           (TypeSig.BOOLEAN + TypeSig.STRING + TypeSig.integral + TypeSig.FLOAT +
-            TypeSig.DOUBLE + TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested()
+            TypeSig.DOUBLE + TypeSig.DATE + TypeSig.TIMESTAMP +
+            TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested()
         ))),
-      (a, conf, p, r) => new UnaryExprMeta[StructsToJson](a, conf, p, r) {
-        override def tagExprForGpu(): Unit = {
-          if (a.options.get("pretty").exists(_.equalsIgnoreCase("true"))) {
-            willNotWorkOnGpu("to_json option pretty=true is not supported")
-          }
-        }
-
-        override def convertToGpu(child: Expression): GpuExpression =
-          GpuStructsToJson(a.options, child, a.timeZoneId)
-      }).disabledByDefault("to_json support is experimental. See compatibility " +
+      (a, conf, p, r) => new GpuStructsToJsonMeta(a, conf, p, r))
+        .disabledByDefault("to_json support is experimental. See compatibility " +
           "guide for more information."),
     expr[JsonTuple](
       "Returns a tuple like the function get_json_object, but it takes multiple names. " +
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala
index 3e674ecb6d8..ea12a483c82 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala
@@ -17,11 +17,64 @@
 package org.apache.spark.sql.rapids
 
 import ai.rapids.cudf.ColumnVector
-import com.nvidia.spark.rapids.{CastOptions, GpuCast, GpuColumnVector, GpuUnaryExpression}
+import com.nvidia.spark.rapids.{CastOptions, DataFromReplacementRule, GpuCast, GpuColumnVector, GpuExpression, GpuUnaryExpression, RapidsConf, RapidsMeta, UnaryExprMeta}
+import com.nvidia.spark.rapids.GpuOverrides
+import com.nvidia.spark.rapids.shims.LegacyBehaviorPolicyShim
 
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.{Expression, StructsToJson}
+import org.apache.spark.sql.catalyst.json.GpuJsonUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, StringType, StructType}
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.types.{DataType, DateType, StringType, StructType, TimestampType}
+
+class GpuStructsToJsonMeta(
+    expr: StructsToJson,
+    conf: RapidsConf,
+    parent: Option[RapidsMeta[_, _, _]],
+    rule: DataFromReplacementRule
+  ) extends UnaryExprMeta[StructsToJson](expr, conf, parent, rule) {
+
+  override def tagExprForGpu(): Unit = {
+    if (expr.options.get("pretty").exists(_.equalsIgnoreCase("true"))) {
+      willNotWorkOnGpu("to_json option pretty=true is not supported")
+    }
+    val options = GpuJsonUtils.parseJSONOptions(expr.options)
+    val hasDates = TrampolineUtil.dataTypeExistsRecursively(expr.child.dataType,
+      _.isInstanceOf[DateType])
+    if (hasDates) {
+      GpuJsonUtils.dateFormatInWrite(options) match {
+        case "yyyy-MM-dd" =>
+        case dateFormat =>
+          // we can likely support other formats but we would need to add tests
+          // tracking issue is https://github.com/NVIDIA/spark-rapids/issues/9602
+          willNotWorkOnGpu(s"Unsupported dateFormat '$dateFormat' in to_json")
+      }
+    }
+    val hasTimestamps = TrampolineUtil.dataTypeExistsRecursively(expr.child.dataType,
+      _.isInstanceOf[TimestampType])
+    if (hasTimestamps) {
+      GpuJsonUtils.timestampFormatInWrite(options) match {
+        case "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]" =>
+        case timestampFormat =>
+          // we can likely support other formats but we would need to add tests
+          // tracking issue is https://github.com/NVIDIA/spark-rapids/issues/9602
+          willNotWorkOnGpu(s"Unsupported timestampFormat '$timestampFormat' in to_json")
+      }
+      if (options.zoneId.normalized() != GpuOverrides.UTC_TIMEZONE_ID) {
+        // we hard-code the timezone `Z` in GpuCast.castTimestampToJson
+        // so we need to fall back if expr different timeZone is specified
+        willNotWorkOnGpu(s"Unsupported timeZone '${options.zoneId}' in to_json")
+      }
+    }
+
+    if (LegacyBehaviorPolicyShim.isLegacyTimeParserPolicy) {
+      willNotWorkOnGpu("LEGACY timeParserPolicy is not supported in GpuJsonToStructs")
+    }
+  }
+
+  override def convertToGpu(child: Expression): GpuExpression =
+    GpuStructsToJson(expr.options, child, expr.timeZoneId)
+}
 
 case class GpuStructsToJson(
   options: Map[String, String],
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 7b7b680db24..ce5ecff513c 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -48,8 +48,22 @@ object GpuJsonUtils {
     optionalTimestampFormatInRead(parseJSONReadOptions(options))
 
   def timestampFormatInRead(options: JSONOptions): String = options.timestampFormat
+
+  def dateFormatInWrite(options: JSONOptions): String =
+    options.dateFormat
+
+  def timestampFormatInWrite(options: JSONOptions): String =
+    options.timestampFormat
+
   def enableDateTimeParsingFallback(options: JSONOptions): Boolean = false
 
+  def parseJSONOptions(options: Map[String, String]) = {
+    new JSONOptions(
+      options,
+      SQLConf.get.sessionLocalTimeZone,
+      SQLConf.get.columnNameOfCorruptRecord)
+  }
+
   def parseJSONReadOptions(options: Map[String, String]) = {
     new JSONOptionsInRead(
       options,
diff --git a/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 5f1d8929887..68c3996131a 100644
--- a/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark321db/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -49,8 +49,22 @@ object GpuJsonUtils {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
     })
 
+  def dateFormatInWrite(options: JSONOptions): String =
+    options.dateFormatInWrite
+
+  def timestampFormatInWrite(options: JSONOptions): String =
+    options.timestampFormatInWrite
+
   def enableDateTimeParsingFallback(options: JSONOptions): Boolean = false
 
+  def parseJSONOptions(options: Map[String, String]) = {
+    new JSONOptions(
+      options,
+      SQLConf.get.sessionLocalTimeZone,
+      SQLConf.get.columnNameOfCorruptRecord)
+  }
+
+
   def parseJSONReadOptions(options: Map[String, String]) = {
     new JSONOptionsInRead(
       options,
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 33989821009..e132ee94baf 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -56,8 +56,22 @@ object GpuJsonUtils {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
     })
 
+  def dateFormatInWrite(options: JSONOptions): String =
+    options.dateFormatInWrite
+
+  def timestampFormatInWrite(options: JSONOptions): String =
+    options.timestampFormatInWrite
+
   def enableDateTimeParsingFallback(options: JSONOptions): Boolean = false
 
+  def parseJSONOptions(options: Map[String, String]) = {
+    new JSONOptions(
+      options,
+      SQLConf.get.sessionLocalTimeZone,
+      SQLConf.get.columnNameOfCorruptRecord)
+  }
+
+
   def parseJSONReadOptions(options: Map[String, String]) = {
     new JSONOptionsInRead(
       options,
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 4685cc0d289..afa0c58ea45 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -60,9 +60,22 @@ object GpuJsonUtils {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
     })
 
+  def dateFormatInWrite(options: JSONOptions): String =
+    options.dateFormatInWrite
+
+  def timestampFormatInWrite(options: JSONOptions): String =
+    options.timestampFormatInWrite
+
   def enableDateTimeParsingFallback(options: JSONOptions): Boolean =
     options.enableDateTimeParsingFallback.getOrElse(false)
 
+  def parseJSONOptions(options: Map[String, String]) = {
+    new JSONOptions(
+      options,
+      SQLConf.get.sessionLocalTimeZone,
+      SQLConf.get.columnNameOfCorruptRecord)
+  }
+
   def parseJSONReadOptions(options: Map[String, String]) = {
     new JSONOptionsInRead(
       options,
diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv
index 14b2f8f784d..033850a7713 100644
--- a/tools/generated_files/supportedExprs.csv
+++ b/tools/generated_files/supportedExprs.csv
@@ -539,7 +539,7 @@ StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,
 StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
 StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
 StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
-StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,NA,NA,S,NA,NA,NA,NA,S,S,S,NA
+StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,S,PS,S,NA,NA,NA,NA,PS,PS,PS,NA
 StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
 Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA
 Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA

From 31aee6374a969a71b2ce29d8cdbea0c8da3655df Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@nvidia.com>
Date: Wed, 29 Nov 2023 14:07:52 -0700
Subject: [PATCH 22/25] fix resource leak in to_json (#9879)

Signed-off-by: Andy Grove <andygrove@nvidia.com>
---
 .../scala/com/nvidia/spark/rapids/GpuCast.scala  | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
index 67bed13f4f5..8ef40af5689 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
@@ -1124,13 +1124,15 @@ object GpuCast {
           }
           if (options.ignoreNullFieldsInStructs) {
             // write the value
-            val attrValue = castToString(cv, inputSchema(fieldIndex).dataType, options)
-            if (needsQuoting) {
-              attrColumns += quote.incRefCount()
-              attrColumns += escapeJsonString(attrValue)
-              attrColumns += quote.incRefCount()
-            } else {
-              attrColumns += attrValue
+            withResource(castToString(cv, inputSchema(fieldIndex).dataType, options)) {
+                attrValue =>
+              if (needsQuoting) {
+                attrColumns += quote.incRefCount()
+                attrColumns += escapeJsonString(attrValue)
+                attrColumns += quote.incRefCount()
+              } else {
+                attrColumns += attrValue.incRefCount()
+              }
             }
             // now concatenate
             val jsonAttr = withResource(Scalar.fromString("")) { emptyString =>

From 8d43a0130add02a5c939a6b20a46e1450a23175f Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Wed, 29 Nov 2023 15:47:23 -0600
Subject: [PATCH 23/25] Fix zero-scale floor and ceil tests (#9874)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
---
 .../src/main/python/arithmetic_ops_test.py         | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py
index e182a0433b9..97d7cb153cb 100644
--- a/integration_tests/src/main/python/arithmetic_ops_test.py
+++ b/integration_tests/src/main/python/arithmetic_ops_test.py
@@ -587,17 +587,15 @@ def test_floor(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a)'))
 
-@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9722')
 @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Floor function is not supported before Spark 3.3.0')
-@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn)
+@pytest.mark.parametrize('data_gen', [long_gen] + _arith_decimal_gens_no_neg_scale, ids=idfn)
 def test_floor_scale_zero(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, 0)'),
-            conf={'spark.rapids.sql.castFloatToDecimal.enabled':'true'})
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, 0)'))
 
 @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Floor function is not supported before Spark 3.3.0')
 @allow_non_gpu('ProjectExec')
-@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale_38_0_overflow, ids=idfn)
+@pytest.mark.parametrize('data_gen', [long_gen] + _arith_decimal_gens_no_neg_scale_38_0_overflow, ids=idfn)
 def test_floor_scale_nonzero(data_gen):
     assert_gpu_fallback_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, -1)'), 'RoundFloor')
@@ -607,13 +605,11 @@ def test_ceil(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a)'))
 
-@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9846')
 @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Ceil function is not supported before Spark 3.3.0')
-@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn)
+@pytest.mark.parametrize('data_gen', [long_gen] + _arith_decimal_gens_no_neg_scale, ids=idfn)
 def test_ceil_scale_zero(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a, 0)'),
-            conf={'spark.rapids.sql.castFloatToDecimal.enabled':'true'})
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a, 0)'))
 
 @pytest.mark.parametrize('data_gen', [_decimal_gen_36_neg5, _decimal_gen_38_neg10], ids=idfn)
 def test_floor_ceil_overflow(data_gen):

From c59b0a2c0712b42fb4c0d169608e1a967e7b9147 Mon Sep 17 00:00:00 2001
From: Raza Jafri <razajafri@users.noreply.github.com>
Date: Thu, 30 Nov 2023 00:50:55 +0100
Subject: [PATCH 24/25] Remove Databricks 13.3 from release 23.12 [databricks]
 (#9890)

* Removed 341db from databricks versions

* Signing off

Signed-off-by: raza <rjafri@nvidia.com>

* generated 2.13 pom

---------

Signed-off-by: raza <rjafri@nvidia.com>
---
 jenkins/Jenkinsfile-blossom.premerge-databricks | 2 +-
 jenkins/databricks/build.sh                     | 6 ++++++
 pom.xml                                         | 3 +--
 scala2.13/pom.xml                               | 3 +--
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks
index 2fd2df7a8b0..86fff7f23be 100644
--- a/jenkins/Jenkinsfile-blossom.premerge-databricks
+++ b/jenkins/Jenkinsfile-blossom.premerge-databricks
@@ -88,7 +88,7 @@ pipeline {
                         // 'name' and 'value' only supprt literal string in the declarative Jenkins
                         // Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
                         name 'DB_RUNTIME'
-                        values '10.4', '11.3', '12.2', '13.3'
+                        values '10.4', '11.3', '12.2'
                     }
                 }
                 stages {
diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh
index 8a0b25a0c95..a68b272257b 100755
--- a/jenkins/databricks/build.sh
+++ b/jenkins/databricks/build.sh
@@ -144,6 +144,12 @@ if [[ "$WITH_BLOOP" == "1" ]]; then
     MVN_OPT="ch.epfl.scala:bloop-maven-plugin:bloopInstall $MVN_OPT"
 fi
 
+# Disabling build for 341db until 24.02
+if [[ "$BUILDVER" == "341db" ]]; then
+    echo "Databricks 341 is not supported as of release 23.12\n"
+    exit 1
+fi 
+
 # Build the RAPIDS plugin by running package command for databricks
 $MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER clean package -DskipTests $MVN_OPT
 
diff --git a/pom.xml b/pom.xml
index 7e6ed88cf9f..e8086a35d06 100644
--- a/pom.xml
+++ b/pom.xml
@@ -771,8 +771,7 @@
         <databricks.buildvers>
             321db,
             330db,
-            332db,
-            341db
+            332db
         </databricks.buildvers>
         <!--
           Build and run unit tests on one specific version for each sub-version (e.g. 311, 320, 330)
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index a5b4c6a7c30..e065c698522 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -771,8 +771,7 @@
         <databricks.buildvers>
             321db,
             330db,
-            332db,
-            341db
+            332db
         </databricks.buildvers>
         <!--
           Build and run unit tests on one specific version for each sub-version (e.g. 311, 320, 330)

From b759259bdc6982c776e0d1e91e4ba2d4a0ba268b Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@nvidia.com>
Date: Wed, 29 Nov 2023 17:04:18 -0700
Subject: [PATCH 25/25] Add support for decimal in `to_json` (#9873)

* Add support for decimal in to_json

* update generated docs

Signed-off-by: Andy Grove <andygrove@nvidia.com>

* address feedback

---------

Signed-off-by: Andy Grove <andygrove@nvidia.com>
---
 docs/supported_ops.md                                          | 2 +-
 integration_tests/src/main/python/json_test.py                 | 3 +++
 .../src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala  | 2 ++
 tools/generated_files/supportedExprs.csv                       | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index d691b3994d2..414a53c56ac 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -14544,7 +14544,7 @@ are limited.
 <td>S</td>
 <td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
 <td>S</td>
-<td> </td>
+<td>S</td>
 <td> </td>
 <td> </td>
 <td> </td>
diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
index 29bc2d3125a..6522ecb8499 100644
--- a/integration_tests/src/main/python/json_test.py
+++ b/integration_tests/src/main/python/json_test.py
@@ -782,6 +782,9 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
     short_gen,
     int_gen,
     long_gen,
+    decimal_gen_32bit,
+    decimal_gen_64bit,
+    decimal_gen_128bit,
     pytest.param(float_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9350')),
     pytest.param(double_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9350')),
     date_gen,
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 633f1c046ae..4d45dacfd0d 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -3605,9 +3605,11 @@ object GpuOverrides extends Logging {
         Seq(ParamCheck("struct",
           (TypeSig.BOOLEAN + TypeSig.STRING + TypeSig.integral + TypeSig.FLOAT +
             TypeSig.DOUBLE + TypeSig.DATE + TypeSig.TIMESTAMP +
+            TypeSig.DECIMAL_128 +
             TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested(),
           (TypeSig.BOOLEAN + TypeSig.STRING + TypeSig.integral + TypeSig.FLOAT +
             TypeSig.DOUBLE + TypeSig.DATE + TypeSig.TIMESTAMP +
+            TypeSig.DECIMAL_128 +
             TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested()
         ))),
       (a, conf, p, r) => new GpuStructsToJsonMeta(a, conf, p, r))
diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv
index 033850a7713..bff8dc7359a 100644
--- a/tools/generated_files/supportedExprs.csv
+++ b/tools/generated_files/supportedExprs.csv
@@ -539,7 +539,7 @@ StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,
 StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
 StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
 StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
-StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,S,PS,S,NA,NA,NA,NA,PS,PS,PS,NA
+StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,S,PS,S,S,NA,NA,NA,PS,PS,PS,NA
 StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
 Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA
 Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA