Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-23.12' into fastparquet-tz
Browse files Browse the repository at this point in the history
  • Loading branch information
mythrocks committed Nov 30, 2023
2 parents 3d3c06b + b759259 commit 7db470f
Show file tree
Hide file tree
Showing 115 changed files with 1,698 additions and 376 deletions.
43 changes: 34 additions & 9 deletions aggregator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,23 @@
<mkdir dir="${newClassesDir}"/>
<delete dir="${newClassesDir}"/>
<unzip src="${realAggJar}" dest="${newClassesDir}"/>
<mkdir dir="${oldClassesDir}"/>

<exec executable="diff"
resultproperty="diff.result">
<arg value="-q"/>
<arg value="-r"/>
<arg value="${oldClassesDir}"/>
<arg value="${newClassesDir}"/>
</exec>
<ac:if xmlns:ac="antlib:net.sf.antcontrib">
<available file="${oldClassesDir}" type="dir"/>
<then>
<exec executable="diff"
resultproperty="diff.result"
discardOutput="${nonfail.errors.quiet}"
discardError="${nonfail.errors.quiet}">
<arg value="-q"/>
<arg value="-r"/>
<arg value="${oldClassesDir}"/>
<arg value="${newClassesDir}"/>
</exec>
</then>
<else>
<echo>Clean build? Skipping diff because ${oldClassesDir} does not exist</echo>
</else>
</ac:if>

<ac:if xmlns:ac="antlib:net.sf.antcontrib">
<equals arg1="0" arg2="${diff.result}"/>
Expand Down Expand Up @@ -611,6 +619,23 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>release341db</id>
<activation>
<property>
<name>buildver</name>
<value>341db</value>
</property>
</activation>
<dependencies>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-delta-spark341db_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<classifier>${spark.version.classifier}</classifier>
</dependency>
</dependencies>
</profile>
<profile>
<id>release333</id>
<activation>
Expand Down
15 changes: 12 additions & 3 deletions dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,18 @@
<goals><goal>run</goal></goals>
<configuration>
<target>
<copy todir="${project.build.directory}/parallel-world">
<fileset dir="${project.build.directory}/jni-deps"/>
</copy>
<taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
<ac:if xmlns:ac="antlib:net.sf.antcontrib">
<available file="${project.build.directory}/jni-deps" type="dir"/>
<then>
<copy todir="${project.build.directory}/parallel-world">
<fileset dir="${project.build.directory}/jni-deps"/>
</copy>
</then>
<else>
<fail>Re-execute build with the default `-Drapids.jni.unpack.skip=false`</fail>
</else>
</ac:if>
</target>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,26 @@
#
# The parameters are:
# - CUDA_VER: 11.8.0 by default
# - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - UCX_VER, UCX_CUDA_VER, and UCX_ARCH:
# Used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - ROCKY_VER: Rocky Linux OS version

ARG CUDA_VER=11.8.0
ARG UCX_VER=1.14.0
ARG UCX_VER=1.15.0
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG ROCKY_VER=8
FROM nvidia/cuda:${CUDA_VER}-runtime-rockylinux${ROCKY_VER}
ARG UCX_VER
ARG UCX_CUDA_VER
ARG UCX_ARCH

RUN yum update -y && yum install -y wget bzip2 numactl-libs libgomp
RUN ls /usr/lib
RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
tar -xvf *.bz2 && \
rpm -i ucx-$UCX_VER*.rpm && \
rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,25 @@
#
# The parameters are:
# - CUDA_VER: 11.8.0 by default
# - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - UCX_VER, UCX_CUDA_VER, and UCX_ARCH:
# Used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - ROCKY_VER: Rocky Linux OS version

ARG CUDA_VER=11.8.0
ARG UCX_VER=1.14.0
ARG UCX_VER=1.15.0
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG ROCKY_VER=8
FROM nvidia/cuda:${CUDA_VER}-runtime-rockylinux${ROCKY_VER}
ARG UCX_VER
ARG UCX_CUDA_VER
ARG UCX_ARCH

RUN yum update -y && yum install -y wget bzip2 rdma-core numactl-libs libgomp libibverbs librdmacm
RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
tar -xvf *.bz2 && \
rpm -i ucx-$UCX_VER*.rpm && \
rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,24 @@
#
# The parameters are:
# - CUDA_VER: 11.8.0 by default
# - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - UCX_VER, UCX_CUDA_VER, and UCX_ARCH:
# Used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - UBUNTU_VER: 20.04 by default
#

ARG CUDA_VER=11.8.0
ARG UCX_VER=1.14.0
ARG UCX_VER=1.15.0
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG UBUNTU_VER=20.04

FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
ARG UCX_VER
ARG UCX_CUDA_VER
ARG UBUNTU_VER
ARG UCX_ARCH

RUN apt-get update && apt-get install -y gnupg2
# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
Expand All @@ -41,7 +44,7 @@ RUN CUDA_UBUNTU_VER=`echo "$UBUNTU_VER"| sed -s 's/\.//'` && \
RUN apt update
RUN apt-get install -y wget
RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
apt install -y /tmp/ucx_install/*.deb && \
rm -rf /tmp/ucx_install
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
# - RDMA_CORE_VERSION: Set to 32.1 to match the rdma-core line in the latest
# released MLNX_OFED 5.x driver
# - CUDA_VER: 11.8.0 by default
# - UCX_VER and UCX_CUDA_VER: these are used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - UCX_VER, UCX_CUDA_VER, and UCX_ARCH:
# Used to pick a package matching a specific UCX version and
# CUDA runtime from the UCX github repo.
# See: https://github.com/openucx/ucx/releases/
# - UBUNTU_VER: 20.04 by default
#
# The Dockerfile first fetches and builds `rdma-core` to satisfy requirements for
Expand All @@ -34,15 +35,17 @@

ARG RDMA_CORE_VERSION=32.1
ARG CUDA_VER=11.8.0
ARG UCX_VER=1.14.0
ARG UCX_VER=1.15.0
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG UBUNTU_VER=20.04

# Throw away image to build rdma_core
FROM ubuntu:${UBUNTU_VER} as rdma_core
ARG RDMA_CORE_VERSION
ARG UBUNTU_VER
ARG CUDA_VER
ARG UCX_ARCH

RUN apt-get update && apt-get install -y gnupg2
# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
Expand All @@ -61,6 +64,7 @@ RUN tar -xvf *.tar.gz && cd rdma-core*/ && dpkg-buildpackage -b -d
FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
ARG UCX_VER
ARG UCX_CUDA_VER
ARG UCX_ARCH
ARG UBUNTU_VER

RUN mkdir /tmp/ucx_install
Expand All @@ -70,7 +74,7 @@ COPY --from=rdma_core /*.deb /tmp/ucx_install/
RUN apt update
RUN apt-get install -y wget
RUN cd /tmp/ucx_install && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER.tar.bz2 && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
tar -xvf ucx-$UCX_VER-ubuntu$UBUNTU_VER-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
apt install -y /tmp/ucx_install/*.deb && \
rm -rf /tmp/ucx_install
26 changes: 24 additions & 2 deletions docs/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,21 @@ after Spark 3.1.0.
We do not disable operations that produce different results due to `-0.0` in the data because it is
considered to be a rare occurrence.

## Decimal Support

Apache Spark supports decimal values with a precision up to 38. This equates to 128-bits.
When processing the data, in most cases, it is temporarily converted to Java's `BigDecimal` type
which allows for effectively unlimited precision. Overflows will be detected whenever the
`BigDecimal` value is converted back into the Spark decimal type.

The RAPIDS Accelerator does not implement a GPU equivalent of `BigDecimal`, but it does implement
computation on 256-bit values to allow the detection of overflows. The points at which overflows
are detected may differ between the CPU and GPU. Spark gives no guarantees that overflows are
detected if an intermediate value could overflow the original decimal type during computation
but the final value does not (e.g.: a sum of values with many large positive values followed by
many large negative values). Spark injects overflow detection at various points during aggregation,
and these points can fluctuate depending on cluster shape and number of shuffle partitions.

## Unicode

Spark delegates Unicode operations to the underlying JVM. Each version of Java complies with a
Expand Down Expand Up @@ -330,7 +345,15 @@ Dates are partially supported but there are some known issues:
parsed as null ([#9664](https://github.com/NVIDIA/spark-rapids/issues/9664)) whereas Spark versions prior to 3.4
will parse these numbers as number of days since the epoch, and in Spark 3.4 and later, an exception will be thrown.

Timestamps are not supported ([#9590](https://github.com/NVIDIA/spark-rapids/issues/9590)).
Timestamps are partially supported but there are some known issues:

- Only the default `timestampFormat` of `yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]` is supported. The query will fall back to CPU if any other format
is specified ([#9273](https://github.com/NVIDIA/spark-rapids/issues/9723))
- Strings containing integers with more than four digits will be
parsed as null ([#9664](https://github.com/NVIDIA/spark-rapids/issues/9664)) whereas Spark versions prior to 3.4
will parse these numbers as number of days since the epoch, and in Spark 3.4 and later, an exception will be thrown.
- Strings containing special date constant values such as `now` and `today` will parse as null ([#9724](https://github.com/NVIDIA/spark-rapids/issues/9724)),
which differs from the behavior in Spark 3.1.x

When reading numeric values, the GPU implementation always supports leading zeros regardless of the setting
for the JSON option `allowNumericLeadingZeros` ([#9588](https://github.com/NVIDIA/spark-rapids/issues/9588)).
Expand All @@ -352,7 +375,6 @@ with Spark, and can be enabled by setting `spark.rapids.sql.expression.StructsTo

Known issues are:

- There is no support for timestamp types
- There can be rounding differences when formatting floating-point numbers as strings. For example, Spark may
produce `-4.1243574E26` but the GPU may produce `-4.124357351E26`.
- Not all JSON options are respected
Expand Down
16 changes: 8 additions & 8 deletions docs/supported_ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -8141,8 +8141,8 @@ are limited.
<td> </td>
<td> </td>
<td><b>NS</b></td>
<td><em>PS<br/>MAP only supports keys and values that are of STRING type;<br/>unsupported child types TIMESTAMP, NULL, BINARY, CALENDAR, MAP, UDT</em></td>
<td><em>PS<br/>unsupported child types TIMESTAMP, NULL, BINARY, CALENDAR, MAP, UDT</em></td>
<td><em>PS<br/>MAP only supports keys and values that are of STRING type;<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT</em></td>
<td> </td>
</tr>
<tr>
Expand Down Expand Up @@ -14541,16 +14541,16 @@ are limited.
<td>S</td>
<td>S</td>
<td>S</td>
<td> </td>
<td> </td>
<td>S</td>
<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
<td>S</td>
<td>S</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>S</td>
<td>S</td>
<td>S</td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
<td> </td>
</tr>
<tr>
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/aqe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pyspark.sql.functions import when, col, current_date, current_timestamp
from pyspark.sql.types import *
from asserts import assert_gpu_and_cpu_are_equal_collect, assert_cpu_and_gpu_are_equal_collect_with_capture
from conftest import is_not_utc
from data_gen import *
from marks import ignore_order, allow_non_gpu
from spark_session import with_cpu_session, is_databricks113_or_later
Expand Down Expand Up @@ -195,6 +196,7 @@ def do_it(spark):
@ignore_order(local=True)
@allow_non_gpu('BroadcastNestedLoopJoinExec', 'Cast', 'DateSub', *db_113_cpu_bnlj_join_allow)
@pytest.mark.parametrize('join', joins, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
def test_aqe_join_reused_exchange_inequality_condition(spark_tmp_path, join):
data_path = spark_tmp_path + '/PARQUET_DATA'
def prep(spark):
Expand Down
17 changes: 9 additions & 8 deletions integration_tests/src/main/python/arithmetic_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pytest

from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_fallback_collect, assert_gpu_and_cpu_are_equal_sql
from conftest import is_not_utc
from data_gen import *
from marks import ignore_order, incompat, approximate_float, allow_non_gpu, datagen_overrides
from pyspark.sql.types import *
Expand Down Expand Up @@ -586,17 +587,15 @@ def test_floor(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a)'))

@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9722')
@pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Floor function is not supported before Spark 3.3.0')
@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn)
@pytest.mark.parametrize('data_gen', [long_gen] + _arith_decimal_gens_no_neg_scale, ids=idfn)
def test_floor_scale_zero(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, 0)'),
conf={'spark.rapids.sql.castFloatToDecimal.enabled':'true'})
lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, 0)'))

@pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Floor function is not supported before Spark 3.3.0')
@allow_non_gpu('ProjectExec')
@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale_38_0_overflow, ids=idfn)
@pytest.mark.parametrize('data_gen', [long_gen] + _arith_decimal_gens_no_neg_scale_38_0_overflow, ids=idfn)
def test_floor_scale_nonzero(data_gen):
assert_gpu_fallback_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, -1)'), 'RoundFloor')
Expand All @@ -607,11 +606,10 @@ def test_ceil(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a)'))

@pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Ceil function is not supported before Spark 3.3.0')
@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn)
@pytest.mark.parametrize('data_gen', [long_gen] + _arith_decimal_gens_no_neg_scale, ids=idfn)
def test_ceil_scale_zero(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a, 0)'),
conf={'spark.rapids.sql.castFloatToDecimal.enabled':'true'})
lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a, 0)'))

@pytest.mark.parametrize('data_gen', [_decimal_gen_36_neg5, _decimal_gen_38_neg10], ids=idfn)
def test_floor_ceil_overflow(data_gen):
Expand Down Expand Up @@ -693,6 +691,7 @@ def test_decimal_bround(data_gen):

@incompat
@approximate_float
@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9847")
@pytest.mark.parametrize('data_gen', _arith_data_gens_for_round, ids=idfn)
def test_decimal_round(data_gen):
assert_gpu_and_cpu_are_equal_collect(
Expand Down Expand Up @@ -985,6 +984,7 @@ def test_columnar_pow(data_gen):
lambda spark : binary_op_df(spark, data_gen).selectExpr('pow(a, b)'))

@pytest.mark.parametrize('data_gen', all_basic_gens + _arith_decimal_gens, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
def test_least(data_gen):
num_cols = 20
s1 = with_cpu_session(
Expand All @@ -1001,6 +1001,7 @@ def test_least(data_gen):
f.least(*command_args)))

@pytest.mark.parametrize('data_gen', all_basic_gens + _arith_decimal_gens, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
def test_greatest(data_gen):
num_cols = 20
s1 = with_cpu_session(
Expand Down
Loading

0 comments on commit 7db470f

Please sign in to comment.