Skip to content

Commit

Permalink
[SPARK-35579][SQL] Bump janino to 3.1.7
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

upgrade janino to 3.1.7 from 3.0.16

### Why are the changes needed?

- The proposed version contains bug fix in janino by maropu.
   - janino-compiler/janino#148
- contains `getBytecodes` method which can be used to simplify the way to get bytecodes from ClassBodyEvaluator in CodeGenerator#updateAndGetCompilationStats method. (by LuciferYang)
   - apache/spark#32536

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UTs

Closes #37202 from singhpk234/upgrade/bump-janino.

Authored-by: Prashant Singh <[email protected]>
Signed-off-by: Sean Owen <[email protected]>
  • Loading branch information
a0x8o committed Jul 18, 2022
1 parent 7028144 commit 9dfa7b4
Show file tree
Hide file tree
Showing 36 changed files with 1,801 additions and 1,324 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
testStddevSamp(true)
testCovarPop()
testCovarSamp()
testRegrIntercept()
testRegrSlope()
testRegrR2()
testRegrSXY()
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,8 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
testCovarPop()
testCovarSamp()
testCorr()
testRegrIntercept()
testRegrSlope()
testRegrR2()
testRegrSXY()
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
testCovarSamp(true)
testCorr()
testCorr(true)
testRegrIntercept()
testRegrIntercept(true)
testRegrSlope()
testRegrSlope(true)
testRegrR2()
testRegrR2(true)
testRegrSXY()
testRegrSXY(true)
}
Original file line number Diff line number Diff line change
Expand Up @@ -406,25 +406,27 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu

protected def caseConvert(tableName: String): String = tableName

private def withOrWithout(isDistinct: Boolean): String = if (isDistinct) "with" else "without"

protected def testVarPop(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: VAR_POP with distinct: $isDistinct") {
test(s"scan with aggregate push-down: VAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(s"SELECT VAR_POP(${distinct}bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
checkFilterPushed(df)
checkAggregateRemoved(df)
checkAggregatePushed(df, "VAR_POP")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 10000d)
assert(row(1).getDouble(0) === 2500d)
assert(row(2).getDouble(0) === 0d)
assert(row(0).getDouble(0) === 10000.0)
assert(row(1).getDouble(0) === 2500.0)
assert(row(2).getDouble(0) === 0.0)
}
}

protected def testVarSamp(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: VAR_SAMP with distinct: $isDistinct") {
test(s"scan with aggregate push-down: VAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT VAR_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
Expand All @@ -433,15 +435,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
checkAggregatePushed(df, "VAR_SAMP")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 20000d)
assert(row(1).getDouble(0) === 5000d)
assert(row(0).getDouble(0) === 20000.0)
assert(row(1).getDouble(0) === 5000.0)
assert(row(2).isNullAt(0))
}
}

protected def testStddevPop(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: STDDEV_POP with distinct: $isDistinct") {
test(s"scan with aggregate push-down: STDDEV_POP ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT STDDEV_POP(${distinct}bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
Expand All @@ -450,15 +452,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
checkAggregatePushed(df, "STDDEV_POP")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 100d)
assert(row(1).getDouble(0) === 50d)
assert(row(2).getDouble(0) === 0d)
assert(row(0).getDouble(0) === 100.0)
assert(row(1).getDouble(0) === 50.0)
assert(row(2).getDouble(0) === 0.0)
}
}

protected def testStddevSamp(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: STDDEV_SAMP with distinct: $isDistinct") {
test(s"scan with aggregate push-down: STDDEV_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT STDDEV_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
Expand All @@ -467,15 +469,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
checkAggregatePushed(df, "STDDEV_SAMP")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 141.4213562373095d)
assert(row(1).getDouble(0) === 70.71067811865476d)
assert(row(0).getDouble(0) === 141.4213562373095)
assert(row(1).getDouble(0) === 70.71067811865476)
assert(row(2).isNullAt(0))
}
}

protected def testCovarPop(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: COVAR_POP with distinct: $isDistinct") {
test(s"scan with aggregate push-down: COVAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT COVAR_POP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
Expand All @@ -484,15 +486,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
checkAggregatePushed(df, "COVAR_POP")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 10000d)
assert(row(1).getDouble(0) === 2500d)
assert(row(2).getDouble(0) === 0d)
assert(row(0).getDouble(0) === 10000.0)
assert(row(1).getDouble(0) === 2500.0)
assert(row(2).getDouble(0) === 0.0)
}
}

protected def testCovarSamp(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: COVAR_SAMP with distinct: $isDistinct") {
test(s"scan with aggregate push-down: COVAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT COVAR_SAMP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
Expand All @@ -501,15 +503,15 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
checkAggregatePushed(df, "COVAR_SAMP")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 20000d)
assert(row(1).getDouble(0) === 5000d)
assert(row(0).getDouble(0) === 20000.0)
assert(row(1).getDouble(0) === 5000.0)
assert(row(2).isNullAt(0))
}
}

protected def testCorr(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: CORR with distinct: $isDistinct") {
test(s"scan with aggregate push-down: CORR ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT CORR(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
Expand All @@ -518,9 +520,77 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
checkAggregatePushed(df, "CORR")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 1d)
assert(row(1).getDouble(0) === 1d)
assert(row(0).getDouble(0) === 1.0)
assert(row(1).getDouble(0) === 1.0)
assert(row(2).isNullAt(0))
}
}

protected def testRegrIntercept(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: REGR_INTERCEPT ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT REGR_INTERCEPT(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
checkFilterPushed(df)
checkAggregateRemoved(df)
checkAggregatePushed(df, "REGR_INTERCEPT")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 0.0)
assert(row(1).getDouble(0) === 0.0)
assert(row(2).isNullAt(0))
}
}

protected def testRegrSlope(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: REGR_SLOPE ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT REGR_SLOPE(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
checkFilterPushed(df)
checkAggregateRemoved(df)
checkAggregatePushed(df, "REGR_SLOPE")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 1.0)
assert(row(1).getDouble(0) === 1.0)
assert(row(2).isNullAt(0))
}
}

protected def testRegrR2(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: REGR_R2 ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT REGR_R2(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
checkFilterPushed(df)
checkAggregateRemoved(df)
checkAggregatePushed(df, "REGR_R2")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 1.0)
assert(row(1).getDouble(0) === 1.0)
assert(row(2).isNullAt(0))
}
}

protected def testRegrSXY(isDistinct: Boolean = false): Unit = {
val distinct = if (isDistinct) "DISTINCT " else ""
test(s"scan with aggregate push-down: REGR_SXY ${withOrWithout(isDistinct)} DISTINCT") {
val df = sql(
s"SELECT REGR_SXY(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
checkFilterPushed(df)
checkAggregateRemoved(df)
checkAggregatePushed(df, "REGR_SXY")
val row = df.collect()
assert(row.length === 3)
assert(row(0).getDouble(0) === 20000.0)
assert(row(1).getDouble(0) === 5000.0)
assert(row(2).getDouble(0) === 0.0)
}
}
}
4 changes: 2 additions & 2 deletions dev/deps/spark-deps-hadoop-2-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar
commons-codec/1.15//commons-codec-1.15.jar
commons-collections/3.2.2//commons-collections-3.2.2.jar
commons-collections4/4.4//commons-collections4-4.4.jar
commons-compiler/3.0.16//commons-compiler-3.0.16.jar
commons-compiler/3.1.7//commons-compiler-3.1.7.jar
commons-compress/1.21//commons-compress-1.21.jar
commons-configuration/1.6//commons-configuration-1.6.jar
commons-crypto/1.1.0//commons-crypto-1.1.0.jar
Expand Down Expand Up @@ -128,7 +128,7 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
janino/3.0.16//janino-3.0.16.jar
janino/3.1.7//janino-3.1.7.jar
javassist/3.25.0-GA//javassist-3.25.0-GA.jar
javax.inject/1//javax.inject-1.jar
javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
Expand Down
4 changes: 2 additions & 2 deletions dev/deps/spark-deps-hadoop-3-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar
commons-codec/1.15//commons-codec-1.15.jar
commons-collections/3.2.2//commons-collections-3.2.2.jar
commons-collections4/4.4//commons-collections4-4.4.jar
commons-compiler/3.0.16//commons-compiler-3.0.16.jar
commons-compiler/3.1.7//commons-compiler-3.1.7.jar
commons-compress/1.21//commons-compress-1.21.jar
commons-crypto/1.1.0//commons-crypto-1.1.0.jar
commons-dbcp/1.4//commons-dbcp-1.4.jar
Expand Down Expand Up @@ -116,7 +116,7 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
janino/3.0.16//janino-3.0.16.jar
janino/3.1.7//janino-3.1.7.jar
javassist/3.25.0-GA//javassist-3.25.0-GA.jar
javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
javolution/5.5.1//javolution-5.5.1.jar
Expand Down
1 change: 1 addition & 0 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ def __hash__(self):
"pyspark.pandas.tests.test_resample",
"pyspark.pandas.tests.test_reshape",
"pyspark.pandas.tests.test_rolling",
"pyspark.pandas.tests.test_scalars",
"pyspark.pandas.tests.test_series_conversion",
"pyspark.pandas.tests.test_series_datetime",
"pyspark.pandas.tests.test_series_string",
Expand Down
1 change: 1 addition & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
- `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal/interval value overflow.
- `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow.
- `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found.
- `try_to_timestamp`: identical to the function `to_timestamp`, except that it returns `NULL` result instead of throwing an exception on string parsing error.

### SQL Keywords (optional, disabled by default)

Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@
<commons-pool2.version>2.11.1</commons-pool2.version>
<datanucleus-core.version>4.1.17</datanucleus-core.version>
<guava.version>14.0.1</guava.version>
<janino.version>3.0.16</janino.version>
<janino.version>3.1.7</janino.version>
<jersey.version>2.35</jersey.version>
<joda.version>2.10.14</joda.version>
<jodd.version>3.5.2</jodd.version>
Expand Down
1 change: 1 addition & 0 deletions python/docs/source/reference/pyspark.sql/data_types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Data Types
NullType
ShortType
StringType
VarcharType
StructField
StructType
TimestampType
Expand Down
3 changes: 3 additions & 0 deletions python/pyspark/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import Any

from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions
from pyspark.pandas.missing.scalars import _MissingPandasLikeScalars
from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version

try:
Expand Down Expand Up @@ -158,6 +159,8 @@ def _auto_patch_pandas() -> None:
def __getattr__(key: str) -> Any:
if key.startswith("__"):
raise AttributeError(key)
if hasattr(_MissingPandasLikeScalars, key):
raise getattr(_MissingPandasLikeScalars, key)
if hasattr(_MissingPandasLikeGeneralFunctions, key):
return getattr(_MissingPandasLikeGeneralFunctions, key)
else:
Expand Down
10 changes: 9 additions & 1 deletion python/pyspark/pandas/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,13 @@ def __init__(
method_name: Optional[str] = None,
arg_name: Optional[str] = None,
property_name: Optional[str] = None,
scalar_name: Optional[str] = None,
deprecated: bool = False,
reason: str = "",
):
assert (method_name is None) != (property_name is None)
assert [method_name is not None, property_name is not None, scalar_name is not None].count(
True
) == 1
self.class_name = class_name
self.method_name = method_name
self.arg_name = arg_name
Expand All @@ -95,6 +98,11 @@ def __init__(
msg = "The method `{0}.{1}()` is not implemented{2}".format(
class_name, method_name, reason
)
elif scalar_name is not None:
msg = (
"The scalar `{0}.{1}` is not reimplemented in pyspark.pandas;"
" use `pd.{1}`.".format(class_name, scalar_name)
)
else:
if deprecated:
msg = (
Expand Down
29 changes: 29 additions & 0 deletions python/pyspark/pandas/missing/scalars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.pandas.exceptions import PandasNotImplementedError


def _unsupported_scalar(scalar_name):
return PandasNotImplementedError(class_name="ps", scalar_name=scalar_name)


class _MissingPandasLikeScalars:
Timestamp = _unsupported_scalar("Timestamp")
Timedelta = _unsupported_scalar("Timedelta")
Period = _unsupported_scalar("Period")
Interval = _unsupported_scalar("Interval")
Categorical = _unsupported_scalar("Categorical")
Loading

0 comments on commit 9dfa7b4

Please sign in to comment.