From 0ab9bd79b33857fbbfaa2233bc81462192b47291 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 12 May 2021 20:57:21 -0700 Subject: [PATCH] [SPARK-35384][SQL] Improve performance for InvokeLike.invoke ### What changes were proposed in this pull request? Change `map` in `InvokeLike.invoke` to a while loop to improve performance, following Spark [style guide](https://github.com/databricks/scala-style-guide#traversal-and-zipwithindex). ### Why are the changes needed? `InvokeLike.invoke`, which is used in non-codegen path for `Invoke` and `StaticInvoke`, currently uses `map` to evaluate arguments: ```scala val args = arguments.map(e => e.eval(input).asInstanceOf[Object]) if (needNullCheck && args.exists(_ == null)) { // return null if one of arguments is null null } else { ... ``` which is pretty expensive if the method itself is trivial. We can change it to a plain while loop. Screen Shot 2021-05-12 at 12 19 59 AM Benchmark results show this can improve as much as 3x from `V2FunctionBenchmark`: Before ``` OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Linux 5.4.0-1046-azure Intel(R) Xeon(R) CPU E5-2673 v3 2.40GHz scalar function (long + long) -> long, result_nullable = false codegen = false: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------------------------------------- native_long_add 36506 36656 251 13.7 73.0 1.0X java_long_add_default 47151 47540 370 10.6 94.3 0.8X java_long_add_magic 178691 182457 1327 2.8 357.4 0.2X java_long_add_static_magic 177151 178258 1151 2.8 354.3 0.2X ``` After ``` OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Linux 5.4.0-1046-azure Intel(R) Xeon(R) CPU E5-2673 v3 2.40GHz scalar function (long + long) -> long, result_nullable = false codegen = false: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------------------------------------- native_long_add 29897 30342 568 16.7 59.8 1.0X java_long_add_default 40628 41075 664 12.3 81.3 0.7X java_long_add_magic 54553 54755 182 9.2 109.1 0.5X java_long_add_static_magic 55410 55532 127 9.0 110.8 0.5X ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests. Closes #32527 from sunchao/SPARK-35384. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../expressions/objects/objects.scala | 12 +++- .../V2FunctionBenchmark-jdk11-results.txt | 56 +++++++++---------- .../V2FunctionBenchmark-results.txt | 48 ++++++++-------- 3 files changed, 61 insertions(+), 55 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 2b4ceb78287e5..abd951fe59670 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -51,6 +51,7 @@ trait InvokeLike extends Expression with NonSQLExpression { def propagateNull: Boolean protected lazy val needNullCheck: Boolean = propagateNull && arguments.exists(_.nullable) + protected lazy val evaluatedArgs: Array[Object] = new Array[Object](arguments.length) /** * Prepares codes for arguments. @@ -127,13 +128,18 @@ trait InvokeLike extends Expression with NonSQLExpression { arguments: Seq[Expression], input: InternalRow, dataType: DataType): Any = { - val args = arguments.map(e => e.eval(input).asInstanceOf[Object]) - if (needNullCheck && args.exists(_ == null)) { + var i = 0 + val len = arguments.length + while (i < len) { + evaluatedArgs(i) = arguments(i).eval(input).asInstanceOf[Object] + i += 1 + } + if (needNullCheck && evaluatedArgs.contains(null)) { // return null if one of arguments is null null } else { val ret = try { - method.invoke(obj, args: _*) + method.invoke(obj, evaluatedArgs: _*) } catch { // Re-throw the original exception. case e: java.lang.reflect.InvocationTargetException if e.getCause != null => diff --git a/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt index e1ada0997f4ce..fa373afb2790a 100644 --- a/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt @@ -1,44 +1,44 @@ OpenJDK 64-Bit Server VM 11.0.11+9-LTS on Linux 5.4.0-1046-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = true codegen = true: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------------------------ -native_long_add 16015 16309 407 31.2 32.0 1.0X -java_long_add_default 48899 49122 352 10.2 97.8 0.3X -java_long_add_magic 19169 19302 117 26.1 38.3 0.8X -java_long_add_static_magic 18308 18373 57 27.3 36.6 0.9X -scala_long_add_default 48773 48922 136 10.3 97.5 0.3X -scala_long_add_magic 18372 18422 44 27.2 36.7 0.9X +native_long_add 17138 17431 486 29.2 34.3 1.0X +java_long_add_default 47386 48316 1583 10.6 94.8 0.4X +java_long_add_magic 19409 19532 152 25.8 38.8 0.9X +java_long_add_static_magic 18257 18294 33 27.4 36.5 0.9X +scala_long_add_default 49259 49512 235 10.2 98.5 0.3X +scala_long_add_magic 18964 19025 53 26.4 37.9 0.9X OpenJDK 64-Bit Server VM 11.0.11+9-LTS on Linux 5.4.0-1046-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = false codegen = true: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------------------------- -native_long_add 16414 16452 41 30.5 32.8 1.0X -java_long_add_default 47640 47767 134 10.5 95.3 0.3X -java_long_add_magic 18413 18554 139 27.2 36.8 0.9X -java_long_add_static_magic 16659 16707 43 30.0 33.3 1.0X -scala_long_add_default 47821 47857 48 10.5 95.6 0.3X -scala_long_add_magic 18406 18502 99 27.2 36.8 0.9X +native_long_add 16814 16916 99 29.7 33.6 1.0X +java_long_add_default 43725 43909 216 11.4 87.4 0.4X +java_long_add_magic 19015 19060 39 26.3 38.0 0.9X +java_long_add_static_magic 18940 18993 52 26.4 37.9 0.9X +scala_long_add_default 43804 43874 88 11.4 87.6 0.4X +scala_long_add_magic 18753 18791 34 26.7 37.5 0.9X OpenJDK 64-Bit Server VM 11.0.11+9-LTS on Linux 5.4.0-1046-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = true codegen = false: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------------------------- -native_long_add 36335 36366 27 13.8 72.7 1.0X -java_long_add_default 53930 54056 155 9.3 107.9 0.7X -java_long_add_magic 126621 127109 471 3.9 253.2 0.3X -java_long_add_static_magic 126914 127193 251 3.9 253.8 0.3X -scala_long_add_default 55812 55949 141 9.0 111.6 0.7X -scala_long_add_magic 127629 127900 420 3.9 255.3 0.3X +native_long_add 42493 42830 506 11.8 85.0 1.0X +java_long_add_default 54557 54710 141 9.2 109.1 0.8X +java_long_add_magic 74409 74564 227 6.7 148.8 0.6X +java_long_add_static_magic 75081 75235 190 6.7 150.2 0.6X +scala_long_add_default 54789 54862 77 9.1 109.6 0.8X +scala_long_add_magic 73777 73886 96 6.8 147.6 0.6X OpenJDK 64-Bit Server VM 11.0.11+9-LTS on Linux 5.4.0-1046-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = false codegen = false: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------------------------------------- -native_long_add 37433 37794 312 13.4 74.9 1.0X -java_long_add_default 53629 53946 416 9.3 107.3 0.7X -java_long_add_magic 160091 160605 549 3.1 320.2 0.2X -java_long_add_static_magic 157228 158430 1372 3.2 314.5 0.2X -scala_long_add_default 54026 54197 187 9.3 108.1 0.7X -scala_long_add_magic 160926 161351 526 3.1 321.9 0.2X +native_long_add 37357 37490 116 13.4 74.7 1.0X +java_long_add_default 53166 53192 23 9.4 106.3 0.7X +java_long_add_magic 70501 71258 1121 7.1 141.0 0.5X +java_long_add_static_magic 68934 69636 1115 7.3 137.9 0.5X +scala_long_add_default 53075 53146 62 9.4 106.2 0.7X +scala_long_add_magic 69838 70746 1442 7.2 139.7 0.5X diff --git a/sql/core/benchmarks/V2FunctionBenchmark-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-results.txt index 450133d30a68e..de56275a3bff4 100644 --- a/sql/core/benchmarks/V2FunctionBenchmark-results.txt +++ b/sql/core/benchmarks/V2FunctionBenchmark-results.txt @@ -2,43 +2,43 @@ OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Linux 5.4.0-1046-azure Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = true codegen = true: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------------------------ -native_long_add 11275 12171 998 44.3 22.5 1.0X -java_long_add_default 37040 37728 755 13.5 74.1 0.3X -java_long_add_magic 13974 14218 276 35.8 27.9 0.8X -java_long_add_static_magic 12019 12260 248 41.6 24.0 0.9X -scala_long_add_default 38262 38520 228 13.1 76.5 0.3X -scala_long_add_magic 13940 14062 202 35.9 27.9 0.8X +native_long_add 9474 10354 975 52.8 18.9 1.0X +java_long_add_default 36465 36994 607 13.7 72.9 0.3X +java_long_add_magic 11922 12228 433 41.9 23.8 0.8X +java_long_add_static_magic 10596 10867 275 47.2 21.2 0.9X +scala_long_add_default 36644 37225 705 13.6 73.3 0.3X +scala_long_add_magic 11887 12140 222 42.1 23.8 0.8X OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Linux 5.4.0-1046-azure Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = false codegen = true: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------------------------- -native_long_add 11041 11194 133 45.3 22.1 1.0X -java_long_add_default 35213 35716 454 14.2 70.4 0.3X -java_long_add_magic 13857 13945 78 36.1 27.7 0.8X -java_long_add_static_magic 11272 11382 97 44.4 22.5 1.0X -scala_long_add_default 35157 36078 1103 14.2 70.3 0.3X -scala_long_add_magic 14005 14194 223 35.7 28.0 0.8X +native_long_add 9197 9295 121 54.4 18.4 1.0X +java_long_add_default 32693 32752 76 15.3 65.4 0.3X +java_long_add_magic 11981 12463 711 41.7 24.0 0.8X +java_long_add_static_magic 9447 9529 87 52.9 18.9 1.0X +scala_long_add_default 31355 31600 220 15.9 62.7 0.3X +scala_long_add_magic 11672 11834 159 42.8 23.3 0.8X OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Linux 5.4.0-1046-azure Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = true codegen = false: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------------------------- -native_long_add 33550 33648 144 14.9 67.1 1.0X -java_long_add_default 44094 44832 690 11.3 88.2 0.8X -java_long_add_magic 173631 175419 1611 2.9 347.3 0.2X -java_long_add_static_magic 173095 174496 1530 2.9 346.2 0.2X -scala_long_add_default 46445 46774 430 10.8 92.9 0.7X -scala_long_add_magic 174399 175016 550 2.9 348.8 0.2X +native_long_add 31047 31422 362 16.1 62.1 1.0X +java_long_add_default 40283 40423 121 12.4 80.6 0.8X +java_long_add_magic 54133 54210 67 9.2 108.3 0.6X +java_long_add_static_magic 56368 56608 212 8.9 112.7 0.6X +scala_long_add_default 40166 40296 132 12.4 80.3 0.8X +scala_long_add_magic 55704 55902 249 9.0 111.4 0.6X OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Linux 5.4.0-1046-azure Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz scalar function (long + long) -> long, result_nullable = false codegen = false: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------------------------------------- -native_long_add 36506 36656 251 13.7 73.0 1.0X -java_long_add_default 47151 47540 370 10.6 94.3 0.8X -java_long_add_magic 178691 182457 1327 2.8 357.4 0.2X -java_long_add_static_magic 177151 178258 1151 2.8 354.3 0.2X -scala_long_add_default 47127 47760 666 10.6 94.3 0.8X -scala_long_add_magic 179941 181417 1545 2.8 359.9 0.2X +native_long_add 29897 30342 568 16.7 59.8 1.0X +java_long_add_default 40628 41075 664 12.3 81.3 0.7X +java_long_add_magic 54553 54755 182 9.2 109.1 0.5X +java_long_add_static_magic 55410 55532 127 9.0 110.8 0.5X +scala_long_add_default 39775 40392 661 12.6 79.5 0.8X +scala_long_add_magic 54221 54631 360 9.2 108.4 0.6X