Skip to content

Commit

Permalink
Merge pull request #5 from apache/master
Browse files Browse the repository at this point in the history
master pull
  • Loading branch information
fengjian428 authored Mar 23, 2022
2 parents 06ea24c + 5f570ea commit 4885c98
Show file tree
Hide file tree
Showing 398 changed files with 8,503 additions and 4,376 deletions.
8 changes: 4 additions & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Punit-tests -pl hudi-common,hudi-flink,hudi-client/hudi-spark-client
options: -Punit-tests -pl hudi-common,hudi-flink-datasource/hudi-flink,hudi-client/hudi-spark-client
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand All @@ -66,7 +66,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Pfunctional-tests -pl hudi-common,hudi-flink
options: -Pfunctional-tests -pl hudi-common,hudi-flink-datasource/hudi-flink
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand Down Expand Up @@ -165,7 +165,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Punit-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
options: -Punit-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand All @@ -174,7 +174,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Pfunctional-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
options: -Pfunctional-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand Down
40 changes: 20 additions & 20 deletions docker/compose/docker-compose_hadoop284_hive233_spark244.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ services:
presto-coordinator-1:
container_name: presto-coordinator-1
hostname: presto-coordinator-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.268:latest
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
ports:
- '8090:8090'
environment:
Expand All @@ -201,25 +201,25 @@ services:
command: coordinator

presto-worker-1:
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.268:latest
depends_on: ["presto-coordinator-1"]
environment:
- PRESTO_JVM_MAX_HEAP=512M
- PRESTO_QUERY_MAX_MEMORY=1GB
- PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- TERM=xterm
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: worker
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
depends_on: [ "presto-coordinator-1" ]
environment:
- PRESTO_JVM_MAX_HEAP=512M
- PRESTO_QUERY_MAX_MEMORY=1GB
- PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- TERM=xterm
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: worker

trino-coordinator-1:
container_name: trino-coordinator-1
Expand Down
24 changes: 3 additions & 21 deletions docker/demo/config/test-suite/cow-spark-long-running.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,6 @@ dag_content:
num_records_insert: 10000
type: SparkInsertNode
deps: none
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
first_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_hive_sync
first_upsert:
config:
record_size: 200
Expand All @@ -45,29 +34,22 @@ dag_content:
num_records_upsert: 3000
num_partitions_upsert: 50
type: SparkUpsertNode
deps: first_validate
deps: first_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 8000
type: SparkDeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
deps: first_delete
last_validate:
config:
execute_itr_count: 30
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
16 changes: 2 additions & 14 deletions docker/demo/config/test-suite/cow-spark-simple.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,11 @@ dag_content:
num_records_insert: 100
type: SparkInsertNode
deps: none
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
first_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_hive_sync
deps: first_insert
first_upsert:
config:
record_size: 1000
Expand All @@ -52,15 +46,9 @@ dag_content:
num_records_delete: 30
type: SparkDeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_hive: false
delete_input_data: false
type: ValidateDatasetNode
deps: second_hive_sync
deps: first_delete
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ dag_content:
engine: "mr"
type: HiveSyncNode
deps: third_insert
first_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_hive_sync
first_upsert:
config:
record_size: 1000
Expand All @@ -61,7 +56,7 @@ dag_content:
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: first_validate
deps: first_hive_sync
first_delete:
config:
num_partitions_delete: 50
Expand All @@ -76,14 +71,13 @@ dag_content:
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
validate_hive: true
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
last_validate:
config:
execute_itr_count: 50
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 1000
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: third_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_validate:
config:
validate_once_every_itr : 5
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
last_validate:
config:
execute_itr_count: 30
type: ValidateAsyncOperations
deps: second_validate
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,15 @@ dag_content:
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
deps: first_delete
last_validate:
config:
execute_itr_count: 50
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
73 changes: 73 additions & 0 deletions docker/demo/config/test-suite/deltastreamer-medium-clustering.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-medium-clustering.yaml
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 1000
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: third_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_validate:
config:
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
last_validate:
config:
execute_itr_count: 20
type: ValidateAsyncOperations
deps: second_validate
Loading

0 comments on commit 4885c98

Please sign in to comment.