From 52a6ce928955462c3e85aa3bf0dda3c46b67d477 Mon Sep 17 00:00:00 2001 From: Zach Burnett Date: Thu, 25 Jul 2024 15:51:04 +0000 Subject: [PATCH] use reusable workflow from WebbPSF repository to download and cache data (#1311) Co-authored-by: Eddie Schlafly --- .github/workflows/data.yml | 97 ++++++++-------------------- .github/workflows/roman_ci.yml | 18 +++--- .github/workflows/roman_ci_cron.yaml | 19 +++--- .github/workflows/tests_devdeps.yml | 19 +++--- 4 files changed, 59 insertions(+), 94 deletions(-) diff --git a/.github/workflows/data.yml b/.github/workflows/data.yml index 0eafbff2c..520b61c11 100644 --- a/.github/workflows/data.yml +++ b/.github/workflows/data.yml @@ -1,77 +1,36 @@ +name: download and cache data + on: - workflow_call: - outputs: - path: - value: ${{ jobs.path.outputs.path }} - webbpsf_path: - value: ${{ jobs.webbpsf_path.outputs.path }} - webbpsf_hash: - value: ${{ jobs.webbpsf_hash.outputs.hash }} - workflow_dispatch: schedule: - cron: "42 4 * * 3" - -env: - DATA_PATH: /tmp/data + workflow_dispatch: + inputs: + webbpsf_minimal: + description: minimal WebbPSF dataset + type: boolean + required: false + default: true jobs: - path: - runs-on: ubuntu-latest - outputs: - path: ${{ steps.path.outputs.path }} - steps: - - id: path - run: echo "path=${{ env.DATA_PATH }}" >> $GITHUB_OUTPUT - webbpsf_path: - needs: [ path ] + download_webbpsf_data: + uses: spacetelescope/webbpsf/.github/workflows/download_data.yml@develop + with: + minimal: ${{ github.event_name != 'workflow_dispatch' && true || inputs.webbpsf_minimal }} + move_data_cache_path: + needs: [ download_webbpsf_data ] runs-on: ubuntu-latest - outputs: - path: ${{ steps.path.outputs.path }} steps: - - id: path - run: echo "path=${{ needs.path.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT - webbpsf_data: - if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) - needs: [ path, webbpsf_path ] - name: download and cache WebbPSF data - runs-on: ubuntu-latest - env: - WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz - steps: - - run: mkdir -p tmp/data - - run: wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz - - id: data_hash - run: echo "hash=$( shasum tmp/webbpsf-data.tar.gz | cut -d ' ' -f 1 )" >> $GITHUB_OUTPUT - - id: cache_check - uses: actions/cache@v4 + - name: retrieve cached WebbPSF data + uses: actions/cache/restore@v4 with: - path: ${{ needs.path.outputs.path }} - key: webbpsf-${{ steps.data_hash.outputs.hash }} - - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} - run: mkdir -p ${{ needs.path.outputs.path }} - - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} - run: tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ needs.path.outputs.path }} - webbpsf_hash: - needs: [ webbpsf_path, webbpsf_data ] - # run data job if webbpsf-data succeeds or is skipped. This allows - # this data job to always fetch the crds context even if the webbpsf data fetching - # was skipped (and an existing cache will be used for the webbpsf data). - if: always() && (needs.webbpsf_data.result == 'success' || needs.webbpsf_data.result == 'skipped') - name: retrieve latest data cache key - runs-on: ubuntu-latest - env: - GH_TOKEN: ${{ github.token }} - outputs: - hash: ${{ steps.hash.outputs.hash }} - steps: - - id: hash - run: | - # use actions/gh-actions-cache to allow filtering by key - gh extension install actions/gh-actions-cache - - RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1) - echo "RECENT=$RECENT" - HASH=$(echo $RECENT | cut -d '-' -f 2) - echo "HASH=$HASH" - echo "hash=$HASH" >> $GITHUB_OUTPUT - if [ "$HASH" == '' ]; then exit 1; fi + path: ${{ needs.download_webbpsf_data.outputs.cache_path }} + key: ${{ needs.download_webbpsf_data.outputs.cache_key }} + - run: mkdir -p /tmp/data/ + - run: mv ${{ needs.download_webbpsf_data.outputs.cache_path }}/webbpsf-data/ /tmp/data/ + - run: echo WEBBPSF_PATH=/tmp/data/webbpsf-data/ >> $GITHUB_ENV + # save a new cache to the generalized data directory + - name: save a single combined data cache + uses: actions/cache/save@v4 + with: + path: /tmp/data/ + key: ${{ needs.download_webbpsf_data.outputs.cache_key }} diff --git a/.github/workflows/roman_ci.yml b/.github/workflows/roman_ci.yml index 1cb2cbb3a..e4dba3b6a 100644 --- a/.github/workflows/roman_ci.yml +++ b/.github/workflows/roman_ci.yml @@ -25,19 +25,21 @@ jobs: with: envs: | - linux: check-dependencies - data: - uses: ./.github/workflows/data.yml + webbpsf_data_cache: + uses: spacetelescope/webbpsf/.github/workflows/retrieve_cache.yml@develop + with: + minimal: true crds_contexts: uses: spacetelescope/crds/.github/workflows/contexts.yml@master test: uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 - needs: [ data, crds_contexts ] + needs: [ webbpsf_data_cache, crds_contexts ] secrets: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: setenv: | - WEBBPSF_PATH: ${{ needs.data.outputs.webbpsf_path }} - CRDS_PATH: ${{ needs.data.outputs.path }}/crds_cache + WEBBPSF_PATH: /tmp/data/webbpsf-data/ + CRDS_PATH: /tmp/data/crds_cache/ CRDS_SERVER_URL: https://roman-crds.stsci.edu CRDS_CLIENT_RETRY_COUNT: 3 CRDS_CLIENT_RETRY_DELAY_SECONDS: 20 @@ -46,9 +48,9 @@ jobs: DD_GIT_REPOSITORY_URL: ${{ github.repositoryUrl }} DD_GIT_COMMIT_SHA: ${{ github.sha }} DD_GIT_BRANCH: ${{ github.ref_name }} - cache-path: ${{ needs.data.outputs.path }} - cache-key: data-${{ needs.data.outputs.webbpsf_hash }}-${{ needs.crds_contexts.outputs.roman }} - cache-restore-keys: webbpsf-${{ needs.data.outputs.webbpsf_hash }} + cache-path: /tmp/data/ + cache-key: data-${{ needs.webbpsf_data_cache.outputs.cache_key }}-${{ needs.crds_contexts.outputs.roman }} + cache-restore-keys: ${{ needs.webbpsf_data_cache.outputs.cache_key }} envs: | - linux: py310-oldestdeps-webbpsf-cov pytest-results-summary: true diff --git a/.github/workflows/roman_ci_cron.yaml b/.github/workflows/roman_ci_cron.yaml index 9226a59fe..9f0be3b56 100644 --- a/.github/workflows/roman_ci_cron.yaml +++ b/.github/workflows/roman_ci_cron.yaml @@ -23,26 +23,29 @@ concurrency: cancel-in-progress: true jobs: - data: + webbpsf_data_cache: if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run scheduled tests'))) - uses: ./.github/workflows/data.yml + uses: spacetelescope/webbpsf/.github/workflows/retrieve_cache.yml@develop + with: + minimal: true crds_contexts: + if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run scheduled tests'))) uses: spacetelescope/crds/.github/workflows/contexts.yml@master test: uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 - needs: [ data, crds_contexts ] + needs: [ webbpsf_data_cache, crds_contexts ] secrets: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: setenv: | - WEBBPSF_PATH: ${{ needs.data.outputs.webbpsf_path }} - CRDS_PATH: ${{ needs.data.outputs.path }}/crds_cache + WEBBPSF_PATH: /tmp/data/webbpsf-data/ + CRDS_PATH: /tmp/data/crds_cache/ CRDS_SERVER_URL: https://roman-crds.stsci.edu CRDS_CLIENT_RETRY_COUNT: 3 CRDS_CLIENT_RETRY_DELAY_SECONDS: 20 - cache-path: ${{ needs.data.outputs.path }} - cache-key: data-${{ needs.data.outputs.webbpsf_hash }}-${{ needs.crds_contexts.outputs.roman }} - cache-restore-keys: webbpsf-${{ needs.data.outputs.webbpsf_hash }} + cache-path: /tmp/data/ + cache-key: data-${{ needs.webbpsf_data_cache.outputs.cache_key }}-${{ needs.crds_contexts.outputs.roman }} + cache-restore-keys: ${{ needs.webbpsf_data_cache.outputs.cache_key }} envs: | - macos: py310-webbpsf pytest-results-summary: true diff --git a/.github/workflows/tests_devdeps.yml b/.github/workflows/tests_devdeps.yml index 2588e0989..50caabd1c 100644 --- a/.github/workflows/tests_devdeps.yml +++ b/.github/workflows/tests_devdeps.yml @@ -24,26 +24,27 @@ concurrency: cancel-in-progress: true jobs: - data: + webbpsf_data_cache: if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run devdeps tests'))) - uses: ./.github/workflows/data.yml + uses: spacetelescope/webbpsf/.github/workflows/retrieve_cache.yml@develop + with: + minimal: true crds_contexts: if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run devdeps tests'))) uses: spacetelescope/crds/.github/workflows/contexts.yml@master test: - if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run devdeps tests'))) uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 - needs: [ data, crds_contexts ] + needs: [ webbpsf_data_cache, crds_contexts ] with: setenv: | - WEBBPSF_PATH: ${{ needs.data.outputs.webbpsf_path }} - CRDS_PATH: ${{ needs.data.outputs.path }}/crds_cache + WEBBPSF_PATH: /tmp/data/webbpsf-data/ + CRDS_PATH: /tmp/data/crds_cache/ CRDS_SERVER_URL: https://roman-crds.stsci.edu CRDS_CLIENT_RETRY_COUNT: 3 CRDS_CLIENT_RETRY_DELAY_SECONDS: 20 - cache-path: ${{ needs.data.outputs.path }} - cache-key: data-${{ needs.data.outputs.webbpsf_hash }}-${{ needs.crds_contexts.outputs.roman }} - cache-restore-keys: webbpsf-${{ needs.data.outputs.webbpsf_hash }} + cache-path: /tmp/data/ + cache-key: data-${{ needs.webbpsf_data_cache.outputs.cache_key }}-${{ needs.crds_contexts.outputs.roman }} + cache-restore-keys: ${{ needs.webbpsf_data_cache.outputs.cache_key }} envs: | - linux: py310-stdevdeps-webbpsf - linux: py310-devdeps-webbpsf