Skip to content

✨Source S3 (v4): Set decimal_as_float to True for parquet files #99877

✨Source S3 (v4): Set decimal_as_float to True for parquet files

✨Source S3 (v4): Set decimal_as_float to True for parquet files #99877

Workflow file for this run

name: Airbyte Connectors & Octavia CI
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
on:
#ability to start task manually in Web UI
workflow_dispatch:
inputs:
debug_mode:
description: "Enable or disable tmate session for debug during helm ac tests"
type: choice
default: "false"
options:
- "true"
- "false"
required: false
push:
branches:
- master
pull_request:
permissions: write-all
jobs:
# The output of this job is used to trigger the following builds.
changes:
name: "Detect Modified Files"
# The filtering action does not deal with well scheduled events so skip to avoid errors.
# See https://github.com/dorny/paths-filter/issues/100 for more info.
# This is okay this workflow is only scheduled on master, where we want to build everything
# so filtering is not required. Use always() in each start block to force the start task.
if: github.event_name != 'schedule'
runs-on: ubuntu-latest
outputs:
build: ${{ steps.filter.outputs.build }}
cdk: ${{ steps.filter.outputs.cdk }}
cli: ${{ steps.filter.outputs.cli }}
connectors_base: ${{ steps.filter.outputs.connectors_base }}
db: ${{ steps.filter.outputs.db }}
any_change: ${{ steps.filter.outputs.any_change }}
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
id: filter
with:
# Note: The following glob expression within a filters are ORs.
# Note: If no filters match, the steps are all skipped WITHOUT reported their status check back to github.
# This can cause required checks to go unreported blocking PRs from being merged
# and this is why we have the any_change filter.
filters: |
build:
- '.github/**'
- 'buildSrc/**'
- 'tools/**'
- '*.gradle'
- 'deps.toml'
- 'airbyte-config-oss/**'
cdk:
- 'airbyte-cdk/**'
cli:
- 'airbyte-api/**'
- 'octavia-cli/**'
connectors_base:
- 'airbyte-integrations/bases/**'
- 'airbyte-integrations/connectors-templates/**'
- 'airbyte-connector-test-harnesses/acceptance-test-harness/**'
db:
- 'airbyte-db/**'
any_change:
- '**/*'
# Uncomment to debug.
# changes-output:
# name: "Debug Change Detection Logic"
# needs: changes
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - run: |
# echo '${{ toJSON(needs) }}'
## BUILDS
octavia-cli-build:
needs: changes
runs-on: ubuntu-latest
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master.
if: needs.changes.outputs.cli == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master')
name: "Octavia CLI: Build"
timeout-minutes: 90
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
# - name: Cache Build Artifacts
# uses: ./.github/actions/cache-build-artifacts
# with:
# cache-key: ${{ secrets.CACHE_VERSION }}
# cache-python: "false"
- uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "17"
- uses: actions/setup-python@v4
with:
python-version: "3.9"
token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
- name: Set up CI Gradle Properties
run: |
mkdir -p ~/.gradle/
cat > ~/.gradle/gradle.properties <<EOF
org.gradle.jvmargs=-Xmx8g -Xss4m \
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
org.gradle.workers.max=8
org.gradle.vfs.watch=false
EOF
- name: Format
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=OCTAVIA_CLI ./gradlew format --scan --info --stacktrace
attempt_limit: 3
attempt_delay: 5000 # in ms
- name: Ensure no file change
run: ./tools/bin/check_for_file_changes
- name: Build
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=OCTAVIA_CLI ./gradlew :octavia-cli:build javadoc --scan
attempt_limit: 3
attempt_delay: 5000 # in ms
- name: Run integration tests
uses: Wandalen/[email protected]
with:
command: ./tools/bin/integration_tests_octavia.sh
attempt_limit: 3
attempt_delay: 5000 # in ms
cdk-build:
needs: changes
runs-on: ubuntu-latest
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master.
if: needs.changes.outputs.cdk == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master')
name: "Airbyte CDK: Build"
timeout-minutes: 90
steps:
- name: Checkout master
uses: actions/checkout@v3
# We checkout master to run mypy the CDK files that were modified in the PR.
# An alternative would be to use a GH action to detect the files and pass them as argument to the build command
# But the additional complexity of detecting the files and passing them to the script through the gradle command doesn't seem worth worth it.
with:
ref: master
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Cache Build Artifacts
uses: ./.github/actions/cache-build-artifacts
with:
cache-key: ${{ secrets.CACHE_VERSION }}-cdk
- uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "17"
- uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Set up CI Gradle Properties
run: |
mkdir -p ~/.gradle/
cat > ~/.gradle/gradle.properties <<EOF
org.gradle.jvmargs=-Xmx8g -Xss4m \
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
org.gradle.workers.max=8
org.gradle.vfs.watch=false
EOF
- name: Format
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=CDK ./gradlew format --scan --info --stacktrace
attempt_limit: 3
attempt_delay: 5000 # in ms
- name: Commit Formatting Changes
uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: Automated Commit - Formatting Changes
commit_user_name: Octavia Squidington III
commit_user_email: [email protected]
- name: Build
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=CDK ./gradlew build --scan
attempt_limit: 3
attempt_delay: 5000 # in ms
# Connectors Base
# In case of self-hosted EC2 errors, remove this block.
start-connectors-base-build-runner:
name: "Connectors Base: Start Build EC2 Runner"
needs:
- changes
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master.
if: |
needs.changes.outputs.build == 'true' || needs.changes.outputs.connectors_base == 'true' || needs.changes.outputs.db == 'true' || (always() && github.ref == 'refs/heads/master')
timeout-minutes: 10
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Check PAT rate limits
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
- name: Start AWS Runner
id: start-ec2-runner
uses: ./.github/actions/start-aws-runner
with:
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
github-token: ${{ env.PAT }}
build-connectors-base:
# In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest.
needs: start-connectors-base-build-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-connectors-base-build-runner.outputs.label }} # run the job on the newly created runner
name: "Connectors Base: Build"
timeout-minutes: 90
steps:
- name: Checkout master
uses: actions/checkout@v3
# We checkout master to run mypy the CDK files that were modified in the PR.
# An alternative would be to use a GH action to detect the files and pass them as argument to the build command
# But the additional complexity of detecting the files and passing them to the script through the gradle command doesn't seem worth worth it.
with:
ref: master
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
# - name: Cache Build Artifacts
# uses: ./.github/actions/cache-build-artifacts
# with:
# cache-key: ${{ secrets.CACHE_VERSION }}-connectors-gradle-build
- uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "17"
- uses: actions/setup-node@v3
with:
node-version: "lts/*"
- uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install Pyenv
run: python3 -m pip install virtualenv --user
- name: Install automake
run: apt-get update && apt-get install -y automake build-essential libtool libtool-bin autoconf
- name: Set up CI Gradle Properties
run: |
mkdir -p ~/.gradle/
cat > ~/.gradle/gradle.properties <<EOF
org.gradle.jvmargs=-Xmx8g -Xss4m \
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
org.gradle.workers.max=8
org.gradle.vfs.watch=false
EOF
- name: Generate Template scaffold
uses: Wandalen/[email protected]
with:
command: ./gradlew :airbyte-integrations:connector-templates:generator:testScaffoldTemplates --scan
attempt_limit: 3
attempt_delay: 5000 # in ms
# Verify that the only committed file changes will be those made by the build, formatter or processResources
- name: Ensure no file change from code formatting
run: git --no-pager diff && test -z "$(git --no-pager diff)"
- name: Format
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=CONNECTORS_BASE ./gradlew format --scan --info --stacktrace
attempt_limit: 3
attempt_delay: 5000 # in ms
- name: Build
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=CONNECTORS_BASE ./gradlew build --scan
attempt_limit: 3
attempt_delay: 5000 # in ms
- name: Process Resources
uses: Wandalen/[email protected]
with:
command: SUB_BUILD=CONNECTORS_BASE ./gradlew :airbyte-config-oss:init-oss:processResources --scan
attempt_limit: 3
attempt_delay: 5000 # in ms
# This is helpful in the case that we change a previously commited generated file to be ignored by git.
- name: Remove any files that have been gitignored
run: git ls-files -i -c --exclude-from=.gitignore | xargs -r git rm --cached
- name: Commit Changes
uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: Automated Commit - Format and Process Resources Changes
commit_user_name: Octavia Squidington III
commit_user_email: [email protected]
- name: Publish Connectors Base Test Results
uses: EnricoMi/publish-unit-test-result-action@v2
id: connectors-test-results
if: always()
with:
junit_files: "/actions-runner/_work/airbyte/airbyte/*/build/test-results/*/*.xml\n/actions-runner/_work/airbyte/airbyte/*/*/build/test-results/*/*.xml"
comment_mode: failures
json_file: connectors_base_results.json
json_test_case_results: true
check_name: "Connectors Base Test Results"
- name: Setup Google Cloud SDK
if: always()
uses: google-github-actions/setup-gcloud@v0
with:
service_account_key: ${{ secrets.GKE_TEST_SA_KEY }}
export_default_credentials: true
- name: Prep Test Results For GCS
if: always()
run: |
python tools/bin/prep_test_results_for_gcs.py --json connectors_base_results.json --jobid $GITHUB_JOB --runid $GITHUB_RUN_ID
- name: Upload Test Results to GCS
if: always()
run: |
gcs_bucket_name="dev-ab-ci-run-results"
filename=$(echo "${{ fromJSON( steps.connectors-test-results.outputs.json ).check_url }}" | sed 's@.*/@@')
echo "$filename"
gsutil -h "Cache-Control:public" cp connectors_base_results.jsonl "gs://$gcs_bucket_name/oss/$filename.jsonl"
- name: Generate Test Report
uses: dorny/test-reporter@v1
if: always()
with:
name: Connectors Base Test Report
# Specify top-level and second-level modules. Note there cannot be a space between the comma.
path: "/actions-runner/_work/airbyte/airbyte/*/build/test-results/*/*.xml,/actions-runner/_work/airbyte/airbyte/*/*/build/test-results/*/*.xml"
reporter: java-junit
fail-on-error: "false"
# In case of self-hosted EC2 errors, remove this block.
stop-connectors-base-build-runner:
name: "Connectors Base: Stop Build EC2 Runner"
timeout-minutes: 10
needs:
- start-connectors-base-build-runner # required to get output from the start-runner job
- build-connectors-base # required to wait when the main job is done
runs-on: ubuntu-latest
# Always is required to stop the runner even if the previous job has errors. However always() runs even if the previous step is skipped.
# Thus, we check for skipped here.
if: ${{ always() && needs.start-connectors-base-build-runner.result != 'skipped'}}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Check PAT rate limits
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
- name: Stop EC2 runner
uses: supertopher/[email protected]
with:
mode: stop
github-token: ${{ env.PAT }}
label: ${{ needs.start-connectors-base-build-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-connectors-base-build-runner.outputs.ec2-instance-id }}
notify-failure-slack-channel:
name: "Notify Slack Channel on Build Failures"
runs-on: ubuntu-latest
needs:
- build-connectors-base
- octavia-cli-build
if: ${{ failure() && github.ref == 'refs/heads/master' }}
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Match GitHub User to Slack User
id: match-github-to-slack-user
uses: ./.github/actions/match-github-to-slack-user
env:
AIRBYTE_TEAM_BOT_SLACK_TOKEN: ${{ secrets.SLACK_AIRBYTE_TEAM_READ_USERS }}
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Publish to OSS Build Failure Slack Channel
uses: abinoda/slack-action@master
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN_AIRBYTE_TEAM }}
with:
args: >-
{\"channel\":\"C03BEADRPNY\", \"blocks\":[
{\"type\":\"divider\"},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" Merge to OSS Master failed! :bangbang: \n\n\"}},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"_merged by_: *${{ github.actor }}* \n\"}},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"<@${{ steps.match-github-to-slack-user.outputs.slack_user_ids }}> \n\"}},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" :octavia-shocked: <https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}|View Action Run> :octavia-shocked: \n\"}},
{\"type\":\"divider\"}]}
notify-failure-slack-channel-fixed-broken-build:
name: "Notify Slack Channel on Build Fixes"
runs-on: ubuntu-latest
needs:
- build-connectors-base
- octavia-cli-build
if: success()
steps:
- name: Get Previous Workflow Status
uses: Mercymeilya/[email protected]
id: last_status
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
# To avoid clogging up the channel, only publish build success if the previous build was a failure since this means the build was fixed.
- name: Publish Build Fixed Message to OSS Build Failure Slack Channel
if: ${{ steps.last_status.outputs.last_status == 'failure' }}
uses: abinoda/slack-action@master
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN_AIRBYTE_TEAM }}
with:
args: >-
{\"channel\":\"C03BEADRPNY\", \"blocks\":[
{\"type\":\"divider\"},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" OSS Master Fixed! :white_check_mark: \n\n\"}},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"_merged by_: *${{ github.actor }}* \n\"}},
{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\" :octavia-rocket: <https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}|View Action Run> :octavia-rocket: \n\"}},
{\"type\":\"divider\"}]}