[SPARK-35579][SQL] Bump janino to 3.1.7

### What changes were proposed in this pull request? upgrade janino to 3.1.7 from 3.0.16 ### Why are the changes needed? - The proposed version contains bug fix in janino by maropu. - janino-compiler/janino#148 - contains `getBytecodes` method which can be used to simplify the way to get bytecodes from ClassBodyEvaluator in CodeGenerator#updateAndGetCompilationStats method. (by LuciferYang) - apache/spark#32536 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UTs Closes #37202 from singhpk234/upgrade/bump-janino. Authored-by: Prashant Singh <[email protected]> Signed-off-by: Sean Owen <[email protected]>
a0x8o · Jul 18, 2022 · 9dfa7b4 · 9dfa7b4
1 parent 7028144
commit 9dfa7b4
Show file tree

Hide file tree

Showing 36 changed files with 1,801 additions and 1,324 deletions.
diff --git a/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -106,4 +106,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   testStddevSamp(true)
   testCovarPop()
   testCovarSamp()
+  testRegrIntercept()
+  testRegrSlope()
+  testRegrR2()
+  testRegrSXY()
 }
diff --git a/...ntegration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/...ntegration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -111,4 +111,8 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
   testCovarPop()
   testCovarSamp()
   testCorr()
+  testRegrIntercept()
+  testRegrSlope()
+  testRegrR2()
+  testRegrSXY()
 }
diff --git a/...egration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/...egration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -104,4 +104,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
   testCovarSamp(true)
   testCorr()
   testCorr(true)
+  testRegrIntercept()
+  testRegrIntercept(true)
+  testRegrSlope()
+  testRegrSlope(true)
+  testRegrR2()
+  testRegrR2(true)
+  testRegrSXY()
+  testRegrSXY(true)
 }
diff --git a/...tor/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/...tor/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -406,25 +406,27 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   protected def caseConvert(tableName: String): String = tableName
 
+  private def withOrWithout(isDistinct: Boolean): String = if (isDistinct) "with" else "without"
+
   protected def testVarPop(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: VAR_POP with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: VAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(s"SELECT VAR_POP(${distinct}bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "VAR_POP")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 10000d)
-      assert(row(1).getDouble(0) === 2500d)
-      assert(row(2).getDouble(0) === 0d)
+      assert(row(0).getDouble(0) === 10000.0)
+      assert(row(1).getDouble(0) === 2500.0)
+      assert(row(2).getDouble(0) === 0.0)
     }
   }
 
   protected def testVarSamp(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: VAR_SAMP with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: VAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(
         s"SELECT VAR_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -433,15 +435,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       checkAggregatePushed(df, "VAR_SAMP")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 20000d)
-      assert(row(1).getDouble(0) === 5000d)
+      assert(row(0).getDouble(0) === 20000.0)
+      assert(row(1).getDouble(0) === 5000.0)
       assert(row(2).isNullAt(0))
     }
   }
 
   protected def testStddevPop(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: STDDEV_POP with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: STDDEV_POP ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(
         s"SELECT STDDEV_POP(${distinct}bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -450,15 +452,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       checkAggregatePushed(df, "STDDEV_POP")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 100d)
-      assert(row(1).getDouble(0) === 50d)
-      assert(row(2).getDouble(0) === 0d)
+      assert(row(0).getDouble(0) === 100.0)
+      assert(row(1).getDouble(0) === 50.0)
+      assert(row(2).getDouble(0) === 0.0)
     }
   }
 
   protected def testStddevSamp(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: STDDEV_SAMP with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: STDDEV_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(
         s"SELECT STDDEV_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -467,15 +469,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       checkAggregatePushed(df, "STDDEV_SAMP")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 141.4213562373095d)
-      assert(row(1).getDouble(0) === 70.71067811865476d)
+      assert(row(0).getDouble(0) === 141.4213562373095)
+      assert(row(1).getDouble(0) === 70.71067811865476)
       assert(row(2).isNullAt(0))
     }
   }
 
   protected def testCovarPop(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: COVAR_POP with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: COVAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(
         s"SELECT COVAR_POP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -484,15 +486,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       checkAggregatePushed(df, "COVAR_POP")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 10000d)
-      assert(row(1).getDouble(0) === 2500d)
-      assert(row(2).getDouble(0) === 0d)
+      assert(row(0).getDouble(0) === 10000.0)
+      assert(row(1).getDouble(0) === 2500.0)
+      assert(row(2).getDouble(0) === 0.0)
     }
   }
 
   protected def testCovarSamp(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: COVAR_SAMP with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: COVAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(
         s"SELECT COVAR_SAMP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -501,15 +503,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       checkAggregatePushed(df, "COVAR_SAMP")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 20000d)
-      assert(row(1).getDouble(0) === 5000d)
+      assert(row(0).getDouble(0) === 20000.0)
+      assert(row(1).getDouble(0) === 5000.0)
       assert(row(2).isNullAt(0))
     }
   }
 
   protected def testCorr(isDistinct: Boolean = false): Unit = {
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: CORR with distinct: $isDistinct") {
+    test(s"scan with aggregate push-down: CORR ${withOrWithout(isDistinct)} DISTINCT") {
       val df = sql(
         s"SELECT CORR(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -518,9 +520,77 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       checkAggregatePushed(df, "CORR")
       val row = df.collect()
       assert(row.length === 3)
-      assert(row(0).getDouble(0) === 1d)
-      assert(row(1).getDouble(0) === 1d)
+      assert(row(0).getDouble(0) === 1.0)
+      assert(row(1).getDouble(0) === 1.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrIntercept(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_INTERCEPT ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_INTERCEPT(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_INTERCEPT")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 0.0)
+      assert(row(1).getDouble(0) === 0.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrSlope(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_SLOPE ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_SLOPE(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_SLOPE")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 1.0)
+      assert(row(1).getDouble(0) === 1.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrR2(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_R2 ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_R2(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_R2")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 1.0)
+      assert(row(1).getDouble(0) === 1.0)
       assert(row(2).isNullAt(0))
     }
   }
+
+  protected def testRegrSXY(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_SXY ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_SXY(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_SXY")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 20000.0)
+      assert(row(1).getDouble(0) === 5000.0)
+      assert(row(2).getDouble(0) === 0.0)
+    }
+  }
 }
diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -39,7 +39,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar
 commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
-commons-compiler/3.0.16//commons-compiler-3.0.16.jar
+commons-compiler/3.1.7//commons-compiler-3.1.7.jar
 commons-compress/1.21//commons-compress-1.21.jar
 commons-configuration/1.6//commons-configuration-1.6.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
@@ -128,7 +128,7 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
 jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
 jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
-janino/3.0.16//janino-3.0.16.jar
+janino/3.1.7//janino-3.1.7.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
 javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -40,7 +40,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar
 commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
-commons-compiler/3.0.16//commons-compiler-3.0.16.jar
+commons-compiler/3.1.7//commons-compiler-3.1.7.jar
 commons-compress/1.21//commons-compress-1.21.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
@@ -116,7 +116,7 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
 jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
 jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
-janino/3.0.16//janino-3.0.16.jar
+janino/3.1.7//janino-3.1.7.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javolution/5.5.1//javolution-5.5.1.jar

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -647,6 +647,7 @@ def __hash__(self):
         "pyspark.pandas.tests.test_resample",
         "pyspark.pandas.tests.test_reshape",
         "pyspark.pandas.tests.test_rolling",
+        "pyspark.pandas.tests.test_scalars",
         "pyspark.pandas.tests.test_series_conversion",
         "pyspark.pandas.tests.test_series_datetime",
         "pyspark.pandas.tests.test_series_string",

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
@@ -319,6 +319,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
   - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal/interval value overflow.
   - `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow.
   - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found.
+  - `try_to_timestamp`: identical to the function `to_timestamp`, except that it returns `NULL` result instead of throwing an exception on string parsing error.
 
 ### SQL Keywords (optional, disabled by default)
 

diff --git a/pom.xml b/pom.xml
@@ -189,7 +189,7 @@
     <commons-pool2.version>2.11.1</commons-pool2.version>
     <datanucleus-core.version>4.1.17</datanucleus-core.version>
     <guava.version>14.0.1</guava.version>
-    <janino.version>3.0.16</janino.version>
+    <janino.version>3.1.7</janino.version>
     <jersey.version>2.35</jersey.version>
     <joda.version>2.10.14</joda.version>
     <jodd.version>3.5.2</jodd.version>

diff --git a/python/docs/source/reference/pyspark.sql/data_types.rst b/python/docs/source/reference/pyspark.sql/data_types.rst
@@ -40,6 +40,7 @@ Data Types
     NullType
     ShortType
     StringType
+    VarcharType
     StructField
     StructType
     TimestampType

diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py
@@ -27,6 +27,7 @@
 from typing import Any
 
 from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions
+from pyspark.pandas.missing.scalars import _MissingPandasLikeScalars
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 
 try:
@@ -158,6 +159,8 @@ def _auto_patch_pandas() -> None:
 def __getattr__(key: str) -> Any:
     if key.startswith("__"):
         raise AttributeError(key)
+    if hasattr(_MissingPandasLikeScalars, key):
+        raise getattr(_MissingPandasLikeScalars, key)
     if hasattr(_MissingPandasLikeGeneralFunctions, key):
         return getattr(_MissingPandasLikeGeneralFunctions, key)
     else:

diff --git a/python/pyspark/pandas/exceptions.py b/python/pyspark/pandas/exceptions.py
@@ -69,10 +69,13 @@ def __init__(
         method_name: Optional[str] = None,
         arg_name: Optional[str] = None,
         property_name: Optional[str] = None,
+        scalar_name: Optional[str] = None,
         deprecated: bool = False,
         reason: str = "",
     ):
-        assert (method_name is None) != (property_name is None)
+        assert [method_name is not None, property_name is not None, scalar_name is not None].count(
+            True
+        ) == 1
         self.class_name = class_name
         self.method_name = method_name
         self.arg_name = arg_name
@@ -95,6 +98,11 @@ def __init__(
                     msg = "The method `{0}.{1}()` is not implemented{2}".format(
                         class_name, method_name, reason
                     )
+        elif scalar_name is not None:
+            msg = (
+                "The scalar `{0}.{1}` is not reimplemented in pyspark.pandas;"
+                " use `pd.{1}`.".format(class_name, scalar_name)
+            )
         else:
             if deprecated:
                 msg = (

diff --git a/python/pyspark/pandas/missing/scalars.py b/python/pyspark/pandas/missing/scalars.py
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.pandas.exceptions import PandasNotImplementedError
+
+
+def _unsupported_scalar(scalar_name):
+    return PandasNotImplementedError(class_name="ps", scalar_name=scalar_name)
+
+
+class _MissingPandasLikeScalars:
+    Timestamp = _unsupported_scalar("Timestamp")
+    Timedelta = _unsupported_scalar("Timedelta")
+    Period = _unsupported_scalar("Period")
+    Interval = _unsupported_scalar("Interval")
+    Categorical = _unsupported_scalar("Categorical")