Skip to content

Commit

Permalink
[CHORE] bring up fixtures for iceberg (#1527)
Browse files Browse the repository at this point in the history
* Adds fixtures (Minio / Iceberg Rest server / Spark) to enable iceberg
integration tests
  • Loading branch information
samster25 authored Oct 25, 2023
1 parent ccd5878 commit 227b622
Show file tree
Hide file tree
Showing 28 changed files with 780 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/nightlies-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ jobs:
- name: Spin up IO services
uses: isbang/[email protected]
with:
compose-file: ./tests/integration/docker-compose/docker-compose.yml
compose-file: ./tests/integration/io/docker-compose/docker-compose.yml
down-flags: --volumes
- name: Run IO integration tests
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ jobs:
- name: Spin up IO services
uses: isbang/[email protected]
with:
compose-file: ./tests/integration/docker-compose/docker-compose.yml
compose-file: ./tests/integration/io/docker-compose/docker-compose.yml
down-flags: --volumes
- name: Run IO integration tests
run: |
Expand Down
File renamed without changes.
71 changes: 71 additions & 0 deletions tests/integration/iceberg/docker-compose/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM python:3.9-bullseye

RUN apt-get -qq update && \
apt-get -qq install -y --no-install-recommends \
sudo \
curl \
vim \
unzip \
openjdk-11-jdk \
build-essential \
software-properties-common \
ssh && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*

# Optional env variables
ENV SPARK_HOME=${SPARK_HOME:-"/opt/spark"}
ENV HADOOP_HOME=${HADOOP_HOME:-"/opt/hadoop"}
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH

RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/spark-events
WORKDIR ${SPARK_HOME}

ENV SPARK_VERSION=3.4.1
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.4_2.12
ENV ICEBERG_VERSION=1.4.0
ENV AWS_SDK_VERSION=2.20.18
ENV PYICEBERG_VERSION=0.4.0

RUN curl --retry 3 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz

# Download iceberg spark runtime
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar -Lo iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
&& mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars

# Download AWS bundle
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
&& mv iceberg-aws-bundle-${ICEBERG_VERSION}.jar /opt/spark/jars

COPY spark-defaults.conf /opt/spark/conf
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}"

RUN chmod u+x /opt/spark/sbin/* && \
chmod u+x /opt/spark/bin/*

RUN pip3 install -q ipython

RUN pip3 install "pyiceberg[s3fs]==${PYICEBERG_VERSION}"

COPY entrypoint.sh .
COPY provision.py .

ENTRYPOINT ["./entrypoint.sh"]
CMD ["notebook"]
86 changes: 86 additions & 0 deletions tests/integration/iceberg/docker-compose/check-license
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


acquire_rat_jar () {

URL="https://repo.maven.apache.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"

JAR="$rat_jar"

# Download rat launch jar if it hasn't been downloaded yet
if [ ! -f "$JAR" ]; then
# Download
printf "Attempting to fetch rat\n"
JAR_DL="${JAR}.part"
if [ $(command -v curl) ]; then
curl -L --silent "${URL}" > "$JAR_DL" && mv "$JAR_DL" "$JAR"
elif [ $(command -v wget) ]; then
wget --quiet ${URL} -O "$JAR_DL" && mv "$JAR_DL" "$JAR"
else
printf "You do not have curl or wget installed, please install rat manually.\n"
exit -1
fi
fi

unzip -tq "$JAR" &> /dev/null
if [ $? -ne 0 ]; then
# We failed to download
rm "$JAR"
printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n"
exit -1
fi
}

# Go to the Spark project root directory
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
cd "$FWDIR"

if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi

export RAT_VERSION=0.15
export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
mkdir -p "$FWDIR"/lib

[[ -f "$rat_jar" ]] || acquire_rat_jar || {
echo "Download failed. Obtain the rat jar manually and place it at $rat_jar"
exit 1
}

mkdir -p build
$java_cmd -jar "$rat_jar" -E "$FWDIR"/dev/.rat-excludes -d "$FWDIR" > build/rat-results.txt

if [ $? -ne 0 ]; then
echo "RAT exited abnormally"
exit 1
fi

ERRORS="$(cat build/rat-results.txt | grep -e "??")"

if test ! -z "$ERRORS"; then
echo "Could not find Apache license headers in the following files:"
echo "$ERRORS"
exit 1
else
echo -e "RAT checks passed."
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
version: '3'

services:
azurite:
image: mcr.microsoft.com/azure-storage/azurite
container_name: azurite
hostname: azurite
ports:
- 10000:10000
command: [azurite-blob, --loose, --blobHost, 0.0.0.0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
version: '3'

services:
gcs-server:
image: fsouza/fake-gcs-server
container_name: gcs-server
ports:
- 4443:4443
entrypoint: >
/bin/sh -c "
mkdir -p /data/warehouse;
/bin/fake-gcs-server -data /data -scheme http;
exit 0;
"
88 changes: 88 additions & 0 deletions tests/integration/iceberg/docker-compose/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
version: '3'

services:
spark-iceberg:
image: python-integration
container_name: pyiceberg-spark
build: .
networks:
iceberg_net:
depends_on:
- rest
- minio
volumes:
- ./warehouse:/home/iceberg/warehouse
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
ports:
- 8888:8888
- 8080:8080
links:
- rest:rest
- minio:minio
rest:
image: tabulario/iceberg-rest
container_name: pyiceberg-rest
networks:
iceberg_net:
ports:
- 8181:8181
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3://warehouse/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
minio:
image: minio/minio
container_name: pyiceberg-minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
- MINIO_DOMAIN=minio
networks:
iceberg_net:
aliases:
- warehouse.minio
ports:
- 9000:9000
command: [server, /data]
mc:
depends_on:
- minio
image: minio/mc
container_name: pyiceberg-mc
networks:
iceberg_net:
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"
networks:
iceberg_net:
25 changes: 25 additions & 0 deletions tests/integration/iceberg/docker-compose/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

start-master.sh -p 7077
start-worker.sh spark://spark-iceberg:7077
start-history-server.sh
python3 provision.py
tail -f /dev/null
Loading

0 comments on commit 227b622

Please sign in to comment.