✨Source S3 (v4): Set decimal_as_float to True for parquet files #99877
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Airbyte Connectors & Octavia CI | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} | |
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} | |
on: | |
#ability to start task manually in Web UI | |
workflow_dispatch: | |
inputs: | |
debug_mode: | |
description: "Enable or disable tmate session for debug during helm ac tests" | |
type: choice | |
default: "false" | |
options: | |
- "true" | |
- "false" | |
required: false | |
push: | |
branches: | |
- master | |
pull_request: | |
permissions: write-all | |
jobs: | |
# The output of this job is used to trigger the following builds. | |
changes: | |
name: "Detect Modified Files" | |
# The filtering action does not deal with well scheduled events so skip to avoid errors. | |
# See https://github.com/dorny/paths-filter/issues/100 for more info. | |
# This is okay this workflow is only scheduled on master, where we want to build everything | |
# so filtering is not required. Use always() in each start block to force the start task. | |
if: github.event_name != 'schedule' | |
runs-on: ubuntu-latest | |
outputs: | |
build: ${{ steps.filter.outputs.build }} | |
cdk: ${{ steps.filter.outputs.cdk }} | |
cli: ${{ steps.filter.outputs.cli }} | |
connectors_base: ${{ steps.filter.outputs.connectors_base }} | |
db: ${{ steps.filter.outputs.db }} | |
any_change: ${{ steps.filter.outputs.any_change }} | |
steps: | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
- uses: dorny/paths-filter@v2 | |
id: filter | |
with: | |
# Note: The following glob expression within a filters are ORs. | |
# Note: If no filters match, the steps are all skipped WITHOUT reported their status check back to github. | |
# This can cause required checks to go unreported blocking PRs from being merged | |
# and this is why we have the any_change filter. | |
filters: | | |
build: | |
- '.github/**' | |
- 'buildSrc/**' | |
- 'tools/**' | |
- '*.gradle' | |
- 'deps.toml' | |
- 'airbyte-config-oss/**' | |
cdk: | |
- 'airbyte-cdk/**' | |
cli: | |
- 'airbyte-api/**' | |
- 'octavia-cli/**' | |
connectors_base: | |
- 'airbyte-integrations/bases/**' | |
- 'airbyte-integrations/connectors-templates/**' | |
- 'airbyte-connector-test-harnesses/acceptance-test-harness/**' | |
db: | |
- 'airbyte-db/**' | |
any_change: | |
- '**/*' | |
# Uncomment to debug. | |
# changes-output: | |
# name: "Debug Change Detection Logic" | |
# needs: changes | |
# runs-on: ubuntu-latest | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - run: | | |
# echo '${{ toJSON(needs) }}' | |
## BUILDS | |
octavia-cli-build: | |
needs: changes | |
runs-on: ubuntu-latest | |
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master. | |
if: needs.changes.outputs.cli == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master') | |
name: "Octavia CLI: Build" | |
timeout-minutes: 90 | |
steps: | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.head_ref }} | |
# - name: Cache Build Artifacts | |
# uses: ./.github/actions/cache-build-artifacts | |
# with: | |
# cache-key: ${{ secrets.CACHE_VERSION }} | |
# cache-python: "false" | |
- uses: actions/setup-java@v3 | |
with: | |
distribution: "zulu" | |
java-version: "17" | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.9" | |
token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} | |
- name: Set up CI Gradle Properties | |
run: | | |
mkdir -p ~/.gradle/ | |
cat > ~/.gradle/gradle.properties <<EOF | |
org.gradle.jvmargs=-Xmx8g -Xss4m \ | |
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED | |
org.gradle.workers.max=8 | |
org.gradle.vfs.watch=false | |
EOF | |
- name: Format | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=OCTAVIA_CLI ./gradlew format --scan --info --stacktrace | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
- name: Ensure no file change | |
run: ./tools/bin/check_for_file_changes | |
- name: Build | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=OCTAVIA_CLI ./gradlew :octavia-cli:build javadoc --scan | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
- name: Run integration tests | |
uses: Wandalen/[email protected] | |
with: | |
command: ./tools/bin/integration_tests_octavia.sh | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
cdk-build: | |
needs: changes | |
runs-on: ubuntu-latest | |
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master. | |
if: needs.changes.outputs.cdk == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master') | |
name: "Airbyte CDK: Build" | |
timeout-minutes: 90 | |
steps: | |
- name: Checkout master | |
uses: actions/checkout@v3 | |
# We checkout master to run mypy the CDK files that were modified in the PR. | |
# An alternative would be to use a GH action to detect the files and pass them as argument to the build command | |
# But the additional complexity of detecting the files and passing them to the script through the gradle command doesn't seem worth worth it. | |
with: | |
ref: master | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
- name: Cache Build Artifacts | |
uses: ./.github/actions/cache-build-artifacts | |
with: | |
cache-key: ${{ secrets.CACHE_VERSION }}-cdk | |
- uses: actions/setup-java@v3 | |
with: | |
distribution: "zulu" | |
java-version: "17" | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.9" | |
- name: Set up CI Gradle Properties | |
run: | | |
mkdir -p ~/.gradle/ | |
cat > ~/.gradle/gradle.properties <<EOF | |
org.gradle.jvmargs=-Xmx8g -Xss4m \ | |
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED | |
org.gradle.workers.max=8 | |
org.gradle.vfs.watch=false | |
EOF | |
- name: Format | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=CDK ./gradlew format --scan --info --stacktrace | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
- name: Commit Formatting Changes | |
uses: stefanzweifel/git-auto-commit-action@v4 | |
with: | |
commit_message: Automated Commit - Formatting Changes | |
commit_user_name: Octavia Squidington III | |
commit_user_email: [email protected] | |
- name: Build | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=CDK ./gradlew build --scan | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
# Connectors Base | |
# In case of self-hosted EC2 errors, remove this block. | |
start-connectors-base-build-runner: | |
name: "Connectors Base: Start Build EC2 Runner" | |
needs: | |
- changes | |
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master. | |
if: | | |
needs.changes.outputs.build == 'true' || needs.changes.outputs.connectors_base == 'true' || needs.changes.outputs.db == 'true' || (always() && github.ref == 'refs/heads/master') | |
timeout-minutes: 10 | |
runs-on: ubuntu-latest | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
- name: Check PAT rate limits | |
run: | | |
./tools/bin/find_non_rate_limited_PAT \ | |
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \ | |
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }} | |
- name: Start AWS Runner | |
id: start-ec2-runner | |
uses: ./.github/actions/start-aws-runner | |
with: | |
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} | |
github-token: ${{ env.PAT }} | |
build-connectors-base: | |
# In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest. | |
needs: start-connectors-base-build-runner # required to start the main job when the runner is ready | |
runs-on: ${{ needs.start-connectors-base-build-runner.outputs.label }} # run the job on the newly created runner | |
name: "Connectors Base: Build" | |
timeout-minutes: 90 | |
steps: | |
- name: Checkout master | |
uses: actions/checkout@v3 | |
# We checkout master to run mypy the CDK files that were modified in the PR. | |
# An alternative would be to use a GH action to detect the files and pass them as argument to the build command | |
# But the additional complexity of detecting the files and passing them to the script through the gradle command doesn't seem worth worth it. | |
with: | |
ref: master | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.head_ref }} | |
# - name: Cache Build Artifacts | |
# uses: ./.github/actions/cache-build-artifacts | |
# with: | |
# cache-key: ${{ secrets.CACHE_VERSION }}-connectors-gradle-build | |
- uses: actions/setup-java@v3 | |
with: | |
distribution: "zulu" | |
java-version: "17" | |
- uses: actions/setup-node@v3 | |
with: | |
node-version: "lts/*" | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.9" | |
- name: Install Pyenv | |
run: python3 -m pip install virtualenv --user | |
- name: Install automake | |
run: apt-get update && apt-get install -y automake build-essential libtool libtool-bin autoconf | |
- name: Set up CI Gradle Properties | |
run: | | |
mkdir -p ~/.gradle/ | |
cat > ~/.gradle/gradle.properties <<EOF | |
org.gradle.jvmargs=-Xmx8g -Xss4m \ | |
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ | |
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED | |
org.gradle.workers.max=8 | |
org.gradle.vfs.watch=false | |
EOF | |
- name: Generate Template scaffold | |
uses: Wandalen/[email protected] | |
with: | |
command: ./gradlew :airbyte-integrations:connector-templates:generator:testScaffoldTemplates --scan | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
# Verify that the only committed file changes will be those made by the build, formatter or processResources | |
- name: Ensure no file change from code formatting | |
run: git --no-pager diff && test -z "$(git --no-pager diff)" | |
- name: Format | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=CONNECTORS_BASE ./gradlew format --scan --info --stacktrace | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
- name: Build | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=CONNECTORS_BASE ./gradlew build --scan | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
- name: Process Resources | |
uses: Wandalen/[email protected] | |
with: | |
command: SUB_BUILD=CONNECTORS_BASE ./gradlew :airbyte-config-oss:init-oss:processResources --scan | |
attempt_limit: 3 | |
attempt_delay: 5000 # in ms | |
# This is helpful in the case that we change a previously commited generated file to be ignored by git. | |
- name: Remove any files that have been gitignored | |
run: git ls-files -i -c --exclude-from=.gitignore | xargs -r git rm --cached | |
- name: Commit Changes | |
uses: stefanzweifel/git-auto-commit-action@v4 | |
with: | |
commit_message: Automated Commit - Format and Process Resources Changes | |
commit_user_name: Octavia Squidington III | |
commit_user_email: [email protected] | |
- name: Publish Connectors Base Test Results | |
uses: EnricoMi/publish-unit-test-result-action@v2 | |
id: connectors-test-results | |
if: always() | |
with: | |
junit_files: "/actions-runner/_work/airbyte/airbyte/*/build/test-results/*/*.xml\n/actions-runner/_work/airbyte/airbyte/*/*/build/test-results/*/*.xml" | |
comment_mode: failures | |
json_file: connectors_base_results.json | |
json_test_case_results: true | |
check_name: "Connectors Base Test Results" | |
- name: Setup Google Cloud SDK | |
if: always() | |
uses: google-github-actions/setup-gcloud@v0 | |
with: | |
service_account_key: ${{ secrets.GKE_TEST_SA_KEY }} | |
export_default_credentials: true | |
- name: Prep Test Results For GCS | |
if: always() | |
run: | | |
python tools/bin/prep_test_results_for_gcs.py --json connectors_base_results.json --jobid $GITHUB_JOB --runid $GITHUB_RUN_ID | |
- name: Upload Test Results to GCS | |
if: always() | |
run: | | |
gcs_bucket_name="dev-ab-ci-run-results" | |
filename=$(echo "${{ fromJSON( steps.connectors-test-results.outputs.json ).check_url }}" | sed 's@.*/@@') | |
echo "$filename" | |
gsutil -h "Cache-Control:public" cp connectors_base_results.jsonl "gs://$gcs_bucket_name/oss/$filename.jsonl" | |
- name: Generate Test Report | |
uses: dorny/test-reporter@v1 | |
if: always() | |
with: | |
name: Connectors Base Test Report | |
# Specify top-level and second-level modules. Note there cannot be a space between the comma. | |
path: "/actions-runner/_work/airbyte/airbyte/*/build/test-results/*/*.xml,/actions-runner/_work/airbyte/airbyte/*/*/build/test-results/*/*.xml" | |
reporter: java-junit | |
fail-on-error: "false" | |
# In case of self-hosted EC2 errors, remove this block. | |
stop-connectors-base-build-runner: | |
name: "Connectors Base: Stop Build EC2 Runner" | |
timeout-minutes: 10 | |
needs: | |
- start-connectors-base-build-runner # required to get output from the start-runner job | |
- build-connectors-base # required to wait when the main job is done | |
runs-on: ubuntu-latest | |
# Always is required to stop the runner even if the previous job has errors. However always() runs even if the previous step is skipped. | |
# Thus, we check for skipped here. | |
if: ${{ always() && needs.start-connectors-base-build-runner.result != 'skipped'}} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-2 | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
- name: Check PAT rate limits | |
run: | | |
./tools/bin/find_non_rate_limited_PAT \ | |
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \ | |
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }} | |
- name: Stop EC2 runner | |
uses: supertopher/[email protected] | |
with: | |
mode: stop | |
github-token: ${{ env.PAT }} | |
label: ${{ needs.start-connectors-base-build-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-connectors-base-build-runner.outputs.ec2-instance-id }} | |
notify-failure-slack-channel: | |
name: "Notify Slack Channel on Build Failures" | |
runs-on: ubuntu-latest | |
needs: | |
- build-connectors-base | |
- octavia-cli-build | |
if: ${{ failure() && github.ref == 'refs/heads/master' }} | |
steps: | |
- name: Checkout Airbyte | |
uses: actions/checkout@v3 | |
- name: Match GitHub User to Slack User | |
id: match-github-to-slack-user | |
uses: ./.github/actions/match-github-to-slack-user | |
env: | |
AIRBYTE_TEAM_BOT_SLACK_TOKEN: ${{ secrets.SLACK_AIRBYTE_TEAM_READ_USERS }} | |
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: Publish to OSS Build Failure Slack Channel | |
uses: abinoda/slack-action@master | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN_AIRBYTE_TEAM }} | |
with: | |
args: >- | |
{\"channel\":\"C03BEADRPNY\", \"blocks\":[ | |
{\"type\":\"divider\"}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" Merge to OSS Master failed! :bangbang: \n\n\"}}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"_merged by_: *${{ github.actor }}* \n\"}}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"<@${{ steps.match-github-to-slack-user.outputs.slack_user_ids }}> \n\"}}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" :octavia-shocked: <https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}|View Action Run> :octavia-shocked: \n\"}}, | |
{\"type\":\"divider\"}]} | |
notify-failure-slack-channel-fixed-broken-build: | |
name: "Notify Slack Channel on Build Fixes" | |
runs-on: ubuntu-latest | |
needs: | |
- build-connectors-base | |
- octavia-cli-build | |
if: success() | |
steps: | |
- name: Get Previous Workflow Status | |
uses: Mercymeilya/[email protected] | |
id: last_status | |
with: | |
github_token: ${{ secrets.GITHUB_TOKEN }} | |
# To avoid clogging up the channel, only publish build success if the previous build was a failure since this means the build was fixed. | |
- name: Publish Build Fixed Message to OSS Build Failure Slack Channel | |
if: ${{ steps.last_status.outputs.last_status == 'failure' }} | |
uses: abinoda/slack-action@master | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN_AIRBYTE_TEAM }} | |
with: | |
args: >- | |
{\"channel\":\"C03BEADRPNY\", \"blocks\":[ | |
{\"type\":\"divider\"}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" OSS Master Fixed! :white_check_mark: \n\n\"}}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"_merged by_: *${{ github.actor }}* \n\"}}, | |
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" :octavia-rocket: <https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}|View Action Run> :octavia-rocket: \n\"}}, | |
{\"type\":\"divider\"}]} |