adding data quality examples #86
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test Branch Deployments End to End | |
on: | |
pull_request: | |
types: [opened, synchronize, reopened, closed] | |
concurrency: | |
# Cancel in-progress deploys to same branch | |
group: ${{ github.ref }}/branch_deployments | |
cancel-in-progress: true | |
env: | |
DAGSTER_CLOUD_URL: "http://wisemuffin.dagster.cloud" | |
DAGSTER_CLOUD_API_TOKEN: ${{ secrets.DAGSTER_CLOUD_API_TOKEN }} | |
ENABLE_FAST_DEPLOYS: 'true' | |
PYTHON_VERSION: '3.11' | |
DAGSTER_CLOUD_FILE: 'dagster_cloud.yaml' | |
DAGSTER_CLOUD_ORGANIZATION: "wisemuffin" | |
DAGSTER_PROJECT_NAME: pipeline_nsw_doe | |
NSW_DOE_DATA_STACK_IN_A_BOX_DAGSTER_PROJECT_DIR: ${{ vars.NSW_DOE_DATA_STACK_IN_A_BOX_DAGSTER_PROJECT_DIR }} | |
NSW_DOE_DATA_STACK_IN_A_BOX_DBT_PROJECT_DIR: ${{ vars.NSW_DOE_DATA_STACK_IN_A_BOX_DBT_PROJECT_DIR }} | |
NSW_DOE_DATA_STACK_IN_A_BOX__ENV: ${{ vars.NSW_DOE_DATA_STACK_IN_A_BOX__ENV }} | |
NSW_DOE_DATA_STACK_IN_A_BOX_DB_PATH_AND_DB: ${{ secrets.NSW_DOE_DATA_STACK_IN_A_BOX_DB_PATH_AND_DB }} | |
NSW_DOE_DATA_STACK_IN_A_BOX_DB_NAME: ${{ vars.NSW_DOE_DATA_STACK_IN_A_BOX_DB_NAME }} | |
MOTHERDUCK_TOKEN: ${{ secrets.MOTHERDUCK_TOKEN }} | |
DATAFOLD_APIKEY: ${{ secrets.DATAFOLD_APIKEY }} | |
DAGSTER_HOME: ${{ vars.DAGSTER_HOME }} | |
AWS_ROLE_TO_ASSUME: ${{ secrets.AWS_ROLE_TO_ASSUME }} | |
S3_BUCKET_METADATA: ${{ secrets.S3_BUCKET_METADATA }} | |
DESTINATION__DUCKDB__CREDENTIALS: ${{ secrets.DESTINATION__DUCKDB__CREDENTIALS }} | |
SOURCES__GITHUB__ACCESS_TOKEN: ${{ secrets.SOURCES__GITHUB__ACCESS_TOKEN }} | |
SOURCES__GOOGLE_ANALYTICS__CREDENTIALS__PROJECT_ID: ${{ secrets.SOURCES__GOOGLE_ANALYTICS__CREDENTIALS__PROJECT_ID }} | |
SOURCES__GOOGLE_ANALYTICS__CREDENTIALS__CLIENT_EMAIL: ${{ secrets.SOURCES__GOOGLE_ANALYTICS__CREDENTIALS__CLIENT_EMAIL }} | |
SOURCES__GOOGLE_ANALYTICS__CREDENTIALS__PRIVATE_KEY: ${{ secrets.SOURCES__GOOGLE_ANALYTICS__CREDENTIALS__PRIVATE_KEY }} | |
SOURCES__GOOGLE_ANALYTICS__PROPERTY_ID: ${{ secrets.SOURCES__GOOGLE_ANALYTICS__PROPERTY_ID }} | |
TPCH__ENV: ${{ vars.TPCH__ENV}} | |
TPCH_DBT_PROJECT_DIR: ${{ vars.TPCH_DBT_PROJECT_DIR}} | |
TPCH_DB_PATH_AND_DB: ${{ secrets.TPCH_DB_PATH_AND_DB}} | |
TPCH_DB_NAME: ${{ vars.TPCH_DB_NAME}} | |
jobs: | |
dagster_cloud_default_deploy: | |
name: Dagster Serverless Deploy | |
runs-on: ubuntu-20.04 | |
environment: test | |
outputs: | |
build_info: ${{ steps.parse-workspace.outputs.build_info }} | |
steps: | |
- name: Setup Python | |
uses: actions/[email protected] | |
with: | |
python-version: "3.11.x" | |
- name: Prerun Checks | |
id: prerun | |
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected] | |
- name: Set NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA | |
run: | | |
# echo "NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA=pr_full_$(echo ${GITHUB_HEAD_REF} | tr '[:upper:]' '[:lower:]' | sed -e 's/[^a-zA-Z0-9]/_/g')" >> $GITHUB_ENV | |
NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA=pr_full_$(echo ${GITHUB_HEAD_REF} | tr '[:upper:]' '[:lower:]' | sed -e 's/[^a-zA-Z0-9]/_/g') | |
echo "NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA=$(echo $NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA)" >> $GITHUB_ENV | |
# echo schema ${{ env.NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA }} # cant ref env variables in command in same run | |
echo schema $NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA | |
- name: check schema | |
run: echo schema ${{ env.NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA }} | |
- name: Launch Docker Deploy | |
if: steps.prerun.outputs.result == 'docker-deploy' | |
id: parse-workspace | |
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected] | |
with: | |
dagster_cloud_file: $DAGSTER_CLOUD_FILE | |
- name: Checkout for Python Executable Deploy | |
if: steps.prerun.outputs.result == 'pex-deploy' | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.head_ref }} | |
path: . | |
- name: Validate configuration | |
id: ci-validate | |
if: steps.prerun.outputs.result == 'pex-deploy' | |
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected] | |
with: | |
command: "ci check --project-dir . --dagster-cloud-yaml-path ${{ env.DAGSTER_CLOUD_FILE }}" | |
- name: Initialize build session | |
id: ci-init | |
if: steps.prerun.outputs.result == 'pex-deploy' | |
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected] | |
with: | |
project_dir: . | |
dagster_cloud_yaml_path: ${{ env.DAGSTER_CLOUD_FILE }} | |
- name: Prepare DBT project for deployment | |
if: steps.prerun.outputs.result == 'pex-deploy' | |
# --upgrade-strategy eager picks up newer packages that are required for things to work | |
run: | | |
# echo "PYTHONPATH="pwd >> $GITHUB_ENV | |
python -m pip install uv | |
uv venv | |
source .venv/bin/activate | |
# currently requirements.txt is stored at total project level one level above dagster project | |
uv pip install -r requirements.txt | |
cd ./transformation/demo_transformation_scaling_tpch | |
dbt deps | |
cd ../.. | |
cd ./transformation/transformation_nsw_doe | |
dbt deps | |
cd ../.. | |
cd ./${{ env.NSW_DOE_DATA_STACK_IN_A_BOX_DAGSTER_PROJECT_DIR }} | |
uv pip install . | |
dagster-dbt project prepare-for-deployment --file ./${{ env.DAGSTER_PROJECT_NAME }}/project.py | |
dagster-dbt project prepare-for-deployment --file ./pipeline_nsw_doe_requires_secrets/project.py | |
dagster-dbt project prepare-for-deployment --file ./demo_pipeline_scaling_tpch/project.py | |
# The cli command below can be used to manage syncing the prod manifest to branches if state_path is set on the DbtProject | |
# dagster-cloud ci dagster-dbt project manage-state --file ./${{ env.DAGSTER_PROJECT_NAME }}/project.py | |
shell: bash | |
- name: Python Executable Deploy | |
if: steps.prerun.outputs.result == 'pex-deploy' | |
uses: dagster-io/dagster-cloud-action/actions/[email protected] | |
with: | |
dagster_cloud_file: "$GITHUB_WORKSPACE/$DAGSTER_CLOUD_FILE" | |
build_output_dir: "$GITHUB_WORKSPACE/build" | |
python_version: "${{ env.PYTHON_VERSION }}" | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA: ${{ env.NSW_DOE_DATA_STACK_IN_A_BOX_TARGET_SCHEMA }} | |
# cant seem to send env to dagster config, for now when doing end to end test will need to set this env in the UI | |
dagster_cloud_docker_deploy: | |
name: Docker Deploy | |
runs-on: ubuntu-20.04 | |
if: needs.dagster_cloud_default_deploy.outputs.build_info | |
needs: dagster_cloud_default_deploy | |
strategy: | |
fail-fast: false | |
matrix: | |
location: ${{ fromJSON(needs.dagster_cloud_default_deploy.outputs.build_info) }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.head_ref }} | |
- name: Build and deploy to Dagster Cloud serverless | |
uses: dagster-io/dagster-cloud-action/actions/[email protected] | |
with: | |
dagster_cloud_api_token: ${{ secrets.DAGSTER_CLOUD_API_TOKEN }} | |
location: ${{ toJson(matrix.location) }} | |
base_image: "python:${{ env.PYTHON_VERSION }}-slim" | |
# Uncomment to pass through Github Action secrets as a JSON string of key-value pairs | |
# env_vars: ${{ toJson(secrets) }} | |
organization_id: ${{ secrets.ORGANIZATION_ID }} | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |