Skip to content

Commit

Permalink
Remove Hive 2 dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
manuzhang committed Sep 27, 2024
1 parent 26648ae commit e724fd0
Show file tree
Hide file tree
Showing 67 changed files with 232 additions and 763 deletions.
31 changes: 1 addition & 30 deletions .github/workflows/hive-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,35 +67,6 @@ concurrency:
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
hive2-tests:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [11, 17, 21]
env:
SPARK_LOCAL_IP: localhost
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ matrix.jvm }}
- uses: actions/cache@v4
with:
path: |
~/.gradle/caches
~/.gradle/wrapper
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check :iceberg-hive-runtime:check -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
name: test logs
path: |
**/build/testlogs
hive3-tests:
runs-on: ubuntu-22.04
strategy:
Expand All @@ -117,7 +88,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc
- run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
10 changes: 6 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ project(':iceberg-hive-metastore') {

compileOnly libs.avro.avro

compileOnly(libs.hive2.metastore) {
compileOnly(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -704,7 +704,7 @@ project(':iceberg-hive-metastore') {
// that's really old. We use the core classifier to be able to override our guava
// version. Luckily, hive-exec seems to work okay so far with this version of guava
// See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context.
testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
testImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -716,7 +716,7 @@ project(':iceberg-hive-metastore') {
exclude group: 'com.google.code.findbugs', module: 'jsr305'
}

testImplementation(libs.hive2.metastore) {
testImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -732,7 +732,9 @@ project(':iceberg-hive-metastore') {
exclude group: 'com.zaxxer', module: 'HikariCP'
}

compileOnly(libs.hadoop2.client) {
testImplementation(libs.hive3.standalone.metastore)

compileOnly(libs.hadoop3.client) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
Expand Down
8 changes: 4 additions & 4 deletions flink/v1.18/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
// that's really old. We use the core classifier to be able to override our guava
// version. Luckily, hive-exec seems to work okay so far with this version of guava
// See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context.
testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
testImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -100,7 +100,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
exclude group: 'com.google.code.findbugs', module: 'jsr305'
}

testImplementation(libs.hive2.metastore) {
testImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand Down Expand Up @@ -192,7 +192,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
exclude group: 'org.apache.avro', module: 'avro'
}

integrationImplementation(libs.hive2.metastore) {
integrationImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -209,7 +209,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
exclude group: 'org.slf4j'
}

integrationImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
integrationImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand Down
8 changes: 4 additions & 4 deletions flink/v1.19/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
// that's really old. We use the core classifier to be able to override our guava
// version. Luckily, hive-exec seems to work okay so far with this version of guava
// See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context.
testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
testImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -100,7 +100,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
exclude group: 'com.google.code.findbugs', module: 'jsr305'
}

testImplementation(libs.hive2.metastore) {
testImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand Down Expand Up @@ -193,7 +193,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
exclude group: 'org.apache.avro', module: 'avro'
}

integrationImplementation(libs.hive2.metastore) {
integrationImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -210,7 +210,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
exclude group: 'org.slf4j'
}

integrationImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
integrationImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand Down
8 changes: 4 additions & 4 deletions flink/v1.20/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
// that's really old. We use the core classifier to be able to override our guava
// version. Luckily, hive-exec seems to work okay so far with this version of guava
// See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context.
testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
testImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -100,7 +100,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
exclude group: 'com.google.code.findbugs', module: 'jsr305'
}

testImplementation(libs.hive2.metastore) {
testImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand Down Expand Up @@ -193,7 +193,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
exclude group: 'org.apache.avro', module: 'avro'
}

integrationImplementation(libs.hive2.metastore) {
integrationImplementation(libs.hive3.metastore) {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand All @@ -210,7 +210,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") {
exclude group: 'org.slf4j'
}

integrationImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") {
integrationImplementation("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.pentaho' // missing dependency
Expand Down
4 changes: 2 additions & 2 deletions gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ jmhJsonOutputPath=build/reports/jmh/results.json
jmhIncludeRegex=.*
systemProp.defaultFlinkVersions=1.20
systemProp.knownFlinkVersions=1.18,1.19,1.20
systemProp.defaultHiveVersions=2
systemProp.knownHiveVersions=2,3
systemProp.defaultHiveVersions=3
systemProp.knownHiveVersions=3
systemProp.defaultSparkVersions=3.5
systemProp.knownSparkVersions=3.3,3.4,3.5
systemProp.defaultKafkaVersions=3
Expand Down
6 changes: 1 addition & 5 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ guava = "33.3.0-jre"
hadoop2 = "2.7.3"
hadoop3 = "3.3.6"
httpcomponents-httpclient5 = "5.4"
hive2 = { strictly = "2.3.9"} # see rich version usage explanation above
hive3 = "3.1.3"
immutables-value = "2.10.1"
jackson-bom = "2.14.2"
Expand Down Expand Up @@ -139,12 +138,9 @@ hadoop2-mapreduce-client-core = { module = "org.apache.hadoop:hadoop-mapreduce-c
hadoop2-minicluster = { module = "org.apache.hadoop:hadoop-minicluster", version.ref = "hadoop2" }
hadoop3-client = { module = "org.apache.hadoop:hadoop-client", version.ref = "hadoop3" }
hadoop3-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop3" }
hive2-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive2" }
hive2-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive2" }
hive2-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive2" }
hive2-service = { module = "org.apache.hive:hive-service", version.ref = "hive2" }
hive3-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive3" }
hive3-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive3" }
hive3-standalone-metastore = { module = "org.apache.hive:hive-standalone-metastore", version.ref = "hive3" }
hive3-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive3" }
hive3-service = { module = "org.apache.hive:hive-service", version.ref = "hive3" }
httpcomponents-httpclient5 = { module = "org.apache.httpcomponents.client5:httpclient5", version.ref = "httpcomponents-httpclient5" }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ public void testRegisterHadoopTableToHiveCatalog() throws IOException, TExceptio

assertThatThrownBy(() -> HIVE_METASTORE_EXTENSION.metastoreClient().getTable(DB_NAME, "table1"))
.isInstanceOf(NoSuchObjectException.class)
.hasMessage("hivedb.table1 table not found");
.hasMessage("hive.hivedb.table1 table not found");
assertThatThrownBy(() -> catalog.loadTable(identifier))
.isInstanceOf(NoSuchTableException.class)
.hasMessage("Table does not exist: hivedb.table1");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.FunctionType;
import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.utils.JavaUtils;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.thrift.transport.TTransportException;
import org.junit.jupiter.api.AfterEach;
Expand Down Expand Up @@ -121,36 +121,36 @@ public void testGetTablesFailsForNonReconnectableException() throws Exception {

@Test
public void testExceptionMessages() {
try (MockedStatic<MetaStoreUtils> mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) {
try (MockedStatic<JavaUtils> mockedStatic = Mockito.mockStatic(JavaUtils.class)) {
mockedStatic
.when(() -> MetaStoreUtils.newInstance(any(), any(), any()))
.when(() -> JavaUtils.newInstance(any(), any(), any()))
.thenThrow(new RuntimeException(new MetaException("Another meta exception")));
assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t")))
.isInstanceOf(RuntimeMetaException.class)
.hasMessage("Failed to connect to Hive Metastore");
}

try (MockedStatic<MetaStoreUtils> mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) {
try (MockedStatic<JavaUtils> mockedStatic = Mockito.mockStatic(JavaUtils.class)) {
mockedStatic
.when(() -> MetaStoreUtils.newInstance(any(), any(), any()))
.when(() -> JavaUtils.newInstance(any(), any(), any()))
.thenThrow(new RuntimeException(new MetaException()));
assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t")))
.isInstanceOf(RuntimeMetaException.class)
.hasMessage("Failed to connect to Hive Metastore");
}

try (MockedStatic<MetaStoreUtils> mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) {
try (MockedStatic<JavaUtils> mockedStatic = Mockito.mockStatic(JavaUtils.class)) {
mockedStatic
.when(() -> MetaStoreUtils.newInstance(any(), any(), any()))
.when(() -> JavaUtils.newInstance(any(), any(), any()))
.thenThrow(new RuntimeException());
assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t")))
.isInstanceOf(RuntimeMetaException.class)
.hasMessage("Failed to connect to Hive Metastore");
}

try (MockedStatic<MetaStoreUtils> mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) {
try (MockedStatic<JavaUtils> mockedStatic = Mockito.mockStatic(JavaUtils.class)) {
mockedStatic
.when(() -> MetaStoreUtils.newInstance(any(), any(), any()))
.when(() -> JavaUtils.newInstance(any(), any(), any()))
.thenThrow(new RuntimeException("Another instance of Derby may have already booted"));
assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t")))
.isInstanceOf(RuntimeMetaException.class)
Expand Down
9 changes: 2 additions & 7 deletions hive3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,6 @@ project(':iceberg-hive3') {
}
}

// exclude these Hive2-specific tests from iceberg-mr
test {
exclude '**/TestIcebergDateObjectInspector.class'
exclude '**/TestIcebergTimestampObjectInspector.class'
exclude '**/TestIcebergTimestampWithZoneObjectInspector.class'
}

dependencies {
compileOnly project(path: ':iceberg-bundled-guava', configuration: 'shadow')
compileOnly project(':iceberg-api')
Expand Down Expand Up @@ -86,6 +79,8 @@ project(':iceberg-hive3') {
exclude group: 'org.codehaus.jackson'
}

implementation libs.caffeine

testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,7 @@ public boolean shouldSkipCombine(Path path, Configuration conf) {
return true;
}

// Override annotation commented out, since this interface method has been introduced only in Hive
// 3
// @Override
@Override
public VectorizedSupport.Support[] getSupportedFeatures() {
return new VectorizedSupport.Support[0];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.mr.mapreduce.Utils;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder;
Expand Down Expand Up @@ -116,7 +117,7 @@ public void commitTask(TaskAttemptContext originalContext) throws IOException {
.executeWith(tableExecutor)
.run(
output -> {
Table table = HiveIcebergStorageHandler.table(context.getJobConf(), output);
Table table = Utils.table(context.getJobConf(), output);
if (table != null) {
HiveIcebergRecordWriter writer = writers.get(output);
DataFile[] closedFiles;
Expand Down Expand Up @@ -206,7 +207,7 @@ public void commitJob(JobContext originalContext) throws IOException {
.executeWith(tableExecutor)
.run(
output -> {
Table table = HiveIcebergStorageHandler.table(jobConf, output);
Table table = Utils.table(jobConf, output);
if (table != null) {
String catalogName = HiveIcebergStorageHandler.catalogName(jobConf, output);
jobLocations.add(
Expand Down Expand Up @@ -262,7 +263,7 @@ public void abortJob(JobContext originalContext, int status) throws IOException
.run(
output -> {
LOG.info("Cleaning table {} with job id {}", output, jobContext.getJobID());
Table table = HiveIcebergStorageHandler.table(jobConf, output);
Table table = Utils.table(jobConf, output);
jobLocations.add(
generateJobLocation(table.location(), jobConf, jobContext.getJobID()));
Collection<DataFile> dataFiles =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.iceberg.io.OutputFileFactory;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.mapred.Container;
import org.apache.iceberg.mr.mapreduce.Utils;
import org.apache.iceberg.util.PropertyUtil;

public class HiveIcebergOutputFormat<T>
Expand Down Expand Up @@ -72,8 +73,7 @@ public void checkOutputSpecs(FileSystem ignored, JobConf job) {
private static HiveIcebergRecordWriter writer(JobConf jc) {
TaskAttemptID taskAttemptID = TezUtil.taskAttemptWrapper(jc);
// It gets the config from the FileSinkOperator which has its own config for every target table
Table table =
HiveIcebergStorageHandler.table(jc, jc.get(hive_metastoreConstants.META_TABLE_NAME));
Table table = Utils.table(jc, jc.get(hive_metastoreConstants.META_TABLE_NAME));
Schema schema = HiveIcebergStorageHandler.schema(jc);
PartitionSpec spec = table.spec();
FileFormat fileFormat =
Expand Down
Loading

0 comments on commit e724fd0

Please sign in to comment.