Skip to content

Commit

Permalink
Merge branch 'main' into issue-10275-alt3
Browse files Browse the repository at this point in the history
  • Loading branch information
slessard committed Sep 26, 2024
2 parents e1b3931 + 2d9c344 commit e574623
Show file tree
Hide file tree
Showing 237 changed files with 9,279 additions and 1,411 deletions.
5 changes: 5 additions & 0 deletions .baseline/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,11 @@
<property name="illegalPkgs" value="org.hamcrest"/>
<message key="import.illegal" value="Prefer using org.assertj.core.api.Assertions instead."/>
</module>
<module name="IllegalImport">
<property name="id" value="BanJUnit5Assertions"/>
<property name="illegalPkgs" value="org.junit.jupiter.api.Assertions"/>
<message key="import.illegal" value="Prefer using org.assertj.core.api.Assertions instead."/>
</module>
<module name="RegexpSinglelineJava">
<property name="ignoreComments" value="true"/>
<property name="format" value="@Json(S|Des)erialize"/>
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/delta-conversion-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ on:
- '.github/workflows/hive-ci.yml'
- '.github/workflows/java-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/open-api.yml'
Expand All @@ -51,6 +52,7 @@ on:
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'flink/**'
- 'kafka-connect/**'
- 'pig/**'
- 'docs/**'
- 'site/**'
Expand Down Expand Up @@ -88,7 +90,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.12 -DhiveVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.12 -DhiveVersions= -DkafkaVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down Expand Up @@ -117,7 +119,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.13 -DhiveVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.13 -DhiveVersions= -DkafkaVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/flink-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ on:
- '.github/workflows/hive-ci.yml'
- '.github/workflows/java-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/open-api.yml'
Expand All @@ -50,6 +51,7 @@ on:
- 'hive3/**'
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'kafka-connect/**'
- 'spark/**'
- 'pig/**'
- 'docs/**'
Expand Down Expand Up @@ -91,7 +93,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions= -DflinkVersions=${{ matrix.flink }} :iceberg-flink:iceberg-flink-${{ matrix.flink }}:check :iceberg-flink:iceberg-flink-runtime-${{ matrix.flink }}:check -Pquick=true -x javadoc
- run: ./gradlew -DsparkVersions= -DhiveVersions= -DkafkaVersions= -DflinkVersions=${{ matrix.flink }} :iceberg-flink:iceberg-flink-${{ matrix.flink }}:check :iceberg-flink:iceberg-flink-runtime-${{ matrix.flink }}:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/hive-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ on:
- '.github/workflows/flink-ci.yml'
- '.github/workflows/java-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/open-api.yml'
Expand All @@ -49,6 +50,7 @@ on:
- 'arrow/**'
- 'spark/**'
- 'flink/**'
- 'kafka-connect/**'
- 'pig/**'
- 'docs/**'
- 'site/**'
Expand Down Expand Up @@ -86,7 +88,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -Pquick=true :iceberg-mr:check :iceberg-hive-runtime:check -x javadoc
- run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check :iceberg-hive-runtime:check -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down Expand Up @@ -115,7 +117,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -Pquick=true :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc
- run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/java-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ on:
- '.github/workflows/flink-ci.yml'
- '.github/workflows/hive-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/open-api.yml'
Expand Down Expand Up @@ -82,7 +83,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -Pquick=true -x javadoc
- run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -DkafkaVersions= -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
105 changes: 105 additions & 0 deletions .github/workflows/kafka-connect-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

name: "Kafka Connect CI"
on:
push:
branches:
- 'main'
- '0.*'
- '1.*'
- '2.*'
tags:
- 'apache-iceberg-**'
pull_request:
paths-ignore:
- '.github/ISSUE_TEMPLATE/**'
- '.github/workflows/api-binary-compatibility.yml'
- '.github/workflows/delta-conversion-ci.yml'
- '.github/workflows/flink-ci.yml'
- '.github/workflows/hive-ci.yml'
- '.github/workflows/java-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/spark-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- 'dev/**'
- 'mr/**'
- 'flink/**'
- 'hive3/**'
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'spark/**'
- 'pig/**'
- 'docs/**'
- 'site/**'
- 'open-api/**'
- 'format/**'
- '.gitattributes'
- 'README.md'
- 'CONTRIBUTING.md'
- 'LICENSE'
- 'NOTICE'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:

kafka-connect-tests:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [11, 17, 21]
env:
SPARK_LOCAL_IP: localhost
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ matrix.jvm }}
- uses: actions/cache@v4
with:
path: |
~/.gradle/caches
~/.gradle/wrapper
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: |
./gradlew -DsparkVersions= -DhiveVersions= -DflinkVersions= -DkafkaVersions=3 \
:iceberg-kafka-connect:iceberg-kafka-connect-events:check \
:iceberg-kafka-connect:iceberg-kafka-connect:check \
:iceberg-kafka-connect:iceberg-kafka-connect-runtime:check \
-Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
name: test logs
path: |
**/build/testlogs
2 changes: 1 addition & 1 deletion .github/workflows/publish-snapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ jobs:
- run: |
./gradlew printVersion
./gradlew -DallModules publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DkafkaVersions=3 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
4 changes: 3 additions & 1 deletion .github/workflows/spark-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ on:
- '.github/workflows/hive-ci.yml'
- '.github/workflows/java-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/open-api.yml'
Expand All @@ -52,6 +53,7 @@ on:
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'flink/**'
- 'kafka-connect/**'
- 'pig/**'
- 'docs/**'
- 'open-api/**'
Expand Down Expand Up @@ -101,7 +103,7 @@ jobs:
tool-cache: false
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: |
./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DhiveVersions= -DflinkVersions= \
./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DhiveVersions= -DflinkVersions= -DkafkaVersions= \
:iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala }}:check \
:iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala }}:check \
:iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala }}:check \
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,6 @@ metastore_db/
# Spark/metastore files
spark-warehouse/
derby.log

# jenv
.java-version
4 changes: 4 additions & 0 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,10 @@ acceptedBreaks:
old: "class org.apache.iceberg.GenericManifestFile"
new: "class org.apache.iceberg.GenericManifestFile"
justification: "Serialization across versions is not supported"
- code: "java.class.defaultSerializationChanged"
old: "class org.apache.iceberg.io.WriteResult"
new: "class org.apache.iceberg.io.WriteResult"
justification: "Serialization across versions is not supported"
- code: "java.class.removed"
old: "enum org.apache.iceberg.BaseMetastoreTableOperations.CommitStatus"
justification: "Removing deprecated code"
Expand Down
20 changes: 19 additions & 1 deletion api/src/main/java/org/apache/iceberg/ContentFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@
* @param <F> the concrete Java class of a ContentFile instance.
*/
public interface ContentFile<F> {
/**
* Returns the path of the manifest which this file is referenced in or null if it was not read
* from a manifest.
*/
default String manifestLocation() {
return null;
}

/**
* Returns the ordinal position of the file in a manifest, or null if it was not read from a
* manifest.
Expand All @@ -43,9 +51,19 @@ public interface ContentFile<F> {
*/
FileContent content();

/** Returns fully qualified path to the file, suitable for constructing a Hadoop Path. */
/**
* Returns fully qualified path to the file, suitable for constructing a Hadoop Path.
*
* @deprecated since 1.7.0, will be removed in 2.0.0; use {@link #location()} instead.
*/
@Deprecated
CharSequence path();

/** Return the fully qualified path to the file. */
default String location() {
return path().toString();
}

/** Returns format of the file. */
FileFormat format();

Expand Down
5 changes: 4 additions & 1 deletion api/src/main/java/org/apache/iceberg/DataFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ public interface DataFile extends ContentFile<DataFile> {
int PARTITION_ID = 102;
String PARTITION_NAME = "partition";
String PARTITION_DOC = "Partition data tuple, schema based on the partition spec";

// NEXT ID TO ASSIGN: 142

static StructType getType(StructType partitionType) {
Expand All @@ -126,7 +127,9 @@ static StructType getType(StructType partitionType) {
SORT_ORDER_ID);
}

/** @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES */
/**
* @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES
*/
@Override
default FileContent content() {
return FileContent.DATA;
Expand Down
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/RowDelta.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ public interface RowDelta extends SnapshotUpdate<RowDelta> {
*/
RowDelta addDeletes(DeleteFile deletes);

/**
* Removes a rewritten {@link DeleteFile} from the table.
*
* @param deletes a delete file that can be removed from the table
* @return this for method chaining
*/
default RowDelta removeDeletes(DeleteFile deletes) {
throw new UnsupportedOperationException(
getClass().getName() + " does not implement removeDeletes");
}

/**
* Set the snapshot ID used in any reads for this operation.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,5 +132,5 @@ public interface UpdatePartitionSpec extends PendingUpdate<PartitionSpec> {
default UpdatePartitionSpec addNonDefaultSpec() {
throw new UnsupportedOperationException(
this.getClass().getName() + " doesn't implement addNonDefaultSpec()");
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import java.util.Map;

/** A minimum client interface to connect to a key management service (KMS). */
/** @deprecated the API will be removed in v2.0.0 (replaced with KeyManagementClient interface). */
/**
* @deprecated the API will be removed in v2.0.0 (replaced with KeyManagementClient interface).
*/
@Deprecated
public interface KmsClient extends Serializable {

Expand Down
Loading

0 comments on commit e574623

Please sign in to comment.