Skip to content

improve WebbPSF data caching in GitHub CI #13

improve WebbPSF data caching in GitHub CI

improve WebbPSF data caching in GitHub CI #13

Workflow file for this run

name: check/update webbpsf cache
on:
workflow_call:
schedule:
- cron: "42 4 * * 3"
pull_request:
# We also want this workflow triggered if the `update webbpsf data` label is
# added or present when PR is updated
types:
- opened
- reopened
- labeled
- unlabeled
- synchronize
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
webbpsf-data:
if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data')))
name: fetch, check, and possibly update webbpsf data cache
runs-on: ubuntu-latest
env:
# DATA_PATH here should match CRDS_PATH in roman_ci.yml and roman_ci_cron.yml
DATA_PATH: /tmp/data
WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz
outputs:
path: ${{ steps.cache_path.outputs.path }}
hash: ${{ steps.data_hash.outputs.hash }}
steps:
- id: cache_path
run: |
echo "path=${{ env.DATA_PATH }}" >> $GITHUB_OUTPUT
- id: data_hash
run: |
mkdir -p tmp/data
wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz
echo "hash=$( shasum tmp/webbpsf-data.tar.gz | cut -d ' ' -f 1 )" >> $GITHUB_OUTPUT
- id: cache_check
uses: actions/cache@v3
with:
path: ${{ steps.cache_path.outputs.path }}
key: webbpsf-${{ steps.data_hash.outputs.hash }}
- if: ${{ steps.cache_check.outputs.cache-hit != 'true' }}
name: Initialize cache
run: |
mkdir -p ${{ steps.cache_path.outputs.path }}
tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ steps.cache_path.outputs.path }}
data:
needs:
[webbpsf-data]
# run data job if webbpsf-data succeeds or is skipped
if: always() && (needs.webbpsf-data.result == 'success' || needs.webbpsf-data.result == 'skipped')
name: retrieve current CRDS context, and WebbPSF data
runs-on: ubuntu-latest
env:
OBSERVATORY: roman
CRDS_SERVER_URL: https://roman-crds.stsci.edu
# CRDS_PATH here should match DATA_PATH in webbpsf_data.yml
CRDS_PATH: /tmp/data
GH_TOKEN: ${{ github.token }}
outputs:
# TODO simplify data_path and crds_path as they are the same
data_path: ${{ steps.data.outputs.path }}
webbpsf_path: ${{ steps.webbpsf_path.outputs.path }}
data_hash: ${{ steps.data_hash.outputs.hash }}
crds_path: ${{ steps.crds_path.outputs.path }}
crds_context: ${{ steps.crds_context.outputs.pmap }}
crds_server: ${{ steps.crds_server.outputs.url }}
steps:
# crds:
- id: data
run: echo "path=/tmp/data" >> $GITHUB_OUTPUT
- id: crds_context
run: >
echo "pmap=$(
curl -s -X POST -d '{"jsonrpc": "1.0", "method": "get_default_context", "params": ["${{ env.OBSERVATORY }}"], "id": 1}' ${{ env.CRDS_SERVER_URL }}/json/ |
python -c "import sys, json; print(json.load(sys.stdin)['result'])"
)" >> $GITHUB_OUTPUT
# Get default CRDS_CONTEXT without installing crds client
# See https://hst-crds.stsci.edu/static/users_guide/web_services.html#generic-request
- id: crds_path
run: echo "path=${{ env.CRDS_PATH }}" >> $GITHUB_OUTPUT
- id: crds_server
run: echo "url=${{ env.CRDS_SERVER_URL }}" >> $GITHUB_OUTPUT
# webbpsf:
- id: data_hash
run: |
# use actions/gh-actions-cache to allow filtering by key
gh extension install actions/gh-actions-cache
RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1)
echo "RECENT=$RECENT"
HASH=$(echo $RECENT | cut -d '-' -f 2)
echo "HASH=$HASH"
echo "hash=$HASH" >> $GITHUB_OUTPUT
if [ "$HASH" == '' ]; then exit 1; fi
- id: webbpsf_path
run: echo "path=${{ steps.data.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT