Skip to content

Commit

Permalink
improve WebbPSF data caching in GitHub CI (#923)
Browse files Browse the repository at this point in the history
  • Loading branch information
braingram authored Oct 16, 2023
2 parents 21f9e7c + 1ed817b commit c2671f0
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 152 deletions.
97 changes: 97 additions & 0 deletions .github/workflows/data.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
name: check and update webbpsf and crds cache

on:
workflow_call:
outputs:
crds_context:
value: ${{ jobs.data.outputs.crds_context }}
crds_path:
value: ${{ jobs.data.outputs.crds_path }}
crds_server:
value: ${{ jobs.data.outputs.crds_server }}
webbpsf_hash:
value: ${{ jobs.data.outputs.webbpsf_hash }}
webbpsf_path:
value: ${{ jobs.data.outputs.webbpsf_path }}
workflow_dispatch:
schedule:
- cron: "42 4 * * 3"

jobs:
webbpsf-data:
if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data')))
name: fetch, check, and possibly update webbpsf data cache
runs-on: ubuntu-latest
env:
DATA_PATH: /tmp/data
WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz
outputs:
path: ${{ steps.cache_path.outputs.path }}
hash: ${{ steps.data_hash.outputs.hash }}
steps:
- id: cache_path
run: |
echo "path=${{ env.DATA_PATH }}" >> $GITHUB_OUTPUT
- id: data_hash
run: |
mkdir -p tmp/data
wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz
echo "hash=$( shasum tmp/webbpsf-data.tar.gz | cut -d ' ' -f 1 )" >> $GITHUB_OUTPUT
- id: cache_check
uses: actions/cache@v3
with:
path: ${{ steps.cache_path.outputs.path }}
key: webbpsf-${{ steps.data_hash.outputs.hash }}
- if: ${{ steps.cache_check.outputs.cache-hit != 'true' }}
name: Initialize cache
run: |
mkdir -p ${{ steps.cache_path.outputs.path }}
tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ steps.cache_path.outputs.path }}
data:
needs:
[webbpsf-data]
# run data job if webbpsf-data succeeds or is skipped. This allows
# this data job to always fetch the crds context even if the webbpsf data fetching
# was skipped (and an existing cache will be used for the webbpsf data).
if: always() && (needs.webbpsf-data.result == 'success' || needs.webbpsf-data.result == 'skipped')
name: retrieve current CRDS context, and WebbPSF data
runs-on: ubuntu-latest
env:
OBSERVATORY: roman
CRDS_SERVER_URL: https://roman-crds.stsci.edu
CRDS_PATH: /tmp/data
GH_TOKEN: ${{ github.token }}
outputs:
crds_context: ${{ steps.crds_context.outputs.pmap }}
crds_path: ${{ steps.crds_path.outputs.path }}
crds_server: ${{ steps.crds_server.outputs.url }}
webbpsf_hash: ${{ steps.webbpsf_hash.outputs.hash }}
webbpsf_path: ${{ steps.webbpsf_path.outputs.path }}
steps:
# crds:
- id: crds_context
run: >
echo "pmap=$(
curl -s -X POST -d '{"jsonrpc": "1.0", "method": "get_default_context", "params": ["${{ env.OBSERVATORY }}"], "id": 1}' ${{ env.CRDS_SERVER_URL }}/json/ |
python -c "import sys, json; print(json.load(sys.stdin)['result'])"
)" >> $GITHUB_OUTPUT
# Get default CRDS_CONTEXT without installing crds client
# See https://hst-crds.stsci.edu/static/users_guide/web_services.html#generic-request
- id: crds_path
run: echo "path=${{ env.CRDS_PATH }}" >> $GITHUB_OUTPUT
- id: crds_server
run: echo "url=${{ env.CRDS_SERVER_URL }}" >> $GITHUB_OUTPUT
# webbpsf:
- id: webbpsf_hash
run: |
# use actions/gh-actions-cache to allow filtering by key
gh extension install actions/gh-actions-cache
RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1)
echo "RECENT=$RECENT"
HASH=$(echo $RECENT | cut -d '-' -f 2)
echo "HASH=$HASH"
echo "hash=$HASH" >> $GITHUB_OUTPUT
if [ "$HASH" == '' ]; then exit 1; fi
- id: webbpsf_path
run: echo "path=${{ steps.crds_path.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT
55 changes: 4 additions & 51 deletions .github/workflows/roman_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,55 +21,7 @@ concurrency:

jobs:
data:
name: retrieve current CRDS context, and WebbPSF data
runs-on: ubuntu-latest
env:
OBSERVATORY: roman
CRDS_SERVER_URL: https://roman-crds.stsci.edu
CRDS_PATH: /tmp/data
outputs:
data_path: ${{ steps.data.outputs.path }}
webbpsf_path: ${{ steps.webbpsf_path.outputs.path }}
data_hash: ${{ steps.data_hash.outputs.hash }}
crds_path: ${{ steps.crds_path.outputs.path }}
crds_context: ${{ steps.crds_context.outputs.pmap }}
crds_server: ${{ steps.crds_server.outputs.url }}
steps:
# crds:
- id: crds_context
run: >
echo "pmap=$(
curl -s -X POST -d '{"jsonrpc": "1.0", "method": "get_default_context", "params": ["${{ env.OBSERVATORY }}"], "id": 1}' ${{ env.CRDS_SERVER_URL }}/json/ |
python -c "import sys, json; print(json.load(sys.stdin)['result'])"
)" >> $GITHUB_OUTPUT
# Get default CRDS_CONTEXT without installing crds client
# See https://hst-crds.stsci.edu/static/users_guide/web_services.html#generic-request
- id: crds_path
run: echo "path=${{ env.CRDS_PATH }}" >> $GITHUB_OUTPUT
- id: crds_server
run: echo "url=${{ env.CRDS_SERVER_URL }}" >> $GITHUB_OUTPUT
# webbpsf:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- id: data
run: |
echo "path=/tmp/data" >> $GITHUB_OUTPUT
echo "webbpsf_url=https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz" >> $GITHUB_OUTPUT
- run: |
mkdir -p tmp/data/
mkdir -p ${{ steps.data.outputs.path }}
- run: wget ${{ steps.data.outputs.webbpsf_url }} -O tmp/minimal-webbpsf-data.tar.gz
- run: tar -xzvf tmp/minimal-webbpsf-data.tar.gz -C tmp/data/
- id: data_hash
run: echo "hash=${{ hashFiles( 'tmp/data' ) }}" >> $GITHUB_OUTPUT
- run: mv tmp/data/* ${{ steps.data.outputs.path }}
- uses: actions/cache@v3
with:
path: ${{ steps.data.outputs.path }}
key: data-${{ steps.data_hash.outputs.hash }}-${{ steps.crds_context.outputs.pmap }}
- id: webbpsf_path
run: echo "path=${{ steps.data.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT
uses: ./.github/workflows/data.yml
check:
uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1
with:
Expand All @@ -91,8 +43,9 @@ jobs:
DD_GIT_REPOSITORY_URL: ${{ github.repositoryUrl }}
DD_GIT_COMMIT_SHA: ${{ github.sha }}
DD_GIT_BRANCH: ${{ github.ref_name }}
cache-path: ${{ needs.data.outputs.data_path }}
cache-key: data-${{ needs.data.outputs.data_hash }}-${{ needs.data.outputs.crds_context }}
cache-path: ${{ needs.data.outputs.crds_path }}
cache-key: data-${{ needs.data.outputs.webbpsf_hash }}-${{ needs.data.outputs.crds_context }}
cache-restore-keys: webbpsf-${{ needs.data.outputs.webbpsf_hash }}
envs: |
- linux: py39-oldestdeps-cov
pytest-results-summary: true
Expand Down
53 changes: 3 additions & 50 deletions .github/workflows/roman_ci_cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,55 +25,7 @@ concurrency:
jobs:
data:
if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run scheduled tests')))
name: retrieve current CRDS context, and WebbPSF data
runs-on: ubuntu-latest
env:
OBSERVATORY: roman
CRDS_SERVER_URL: https://roman-crds.stsci.edu
CRDS_PATH: /tmp/data
outputs:
data_path: ${{ steps.data.outputs.path }}
webbpsf_path: ${{ steps.webbpsf_path.outputs.path }}
data_hash: ${{ steps.data_hash.outputs.hash }}
crds_path: ${{ steps.crds_path.outputs.path }}
crds_context: ${{ steps.crds_context.outputs.pmap }}
crds_server: ${{ steps.crds_server.outputs.url }}
steps:
# crds:
- id: crds_context
run: >
echo "pmap=$(
curl -s -X POST -d '{"jsonrpc": "1.0", "method": "get_default_context", "params": ["${{ env.OBSERVATORY }}"], "id": 1}' ${{ env.CRDS_SERVER_URL }}/json/ |
python -c "import sys, json; print(json.load(sys.stdin)['result'])"
)" >> $GITHUB_OUTPUT
# Get default CRDS_CONTEXT without installing crds client
# See https://hst-crds.stsci.edu/static/users_guide/web_services.html#generic-request
- id: crds_path
run: echo "path=${{ env.CRDS_PATH }}" >> $GITHUB_OUTPUT
- id: crds_server
run: echo "url=${{ env.CRDS_SERVER_URL }}" >> $GITHUB_OUTPUT
# webbpsf:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- id: data
run: |
echo "path=/tmp/data" >> $GITHUB_OUTPUT
echo "webbpsf_url=https://stsci.box.com/shared/static/n1fealx9q0m6sdnass6wnyfikvxtc0zz.gz" >> $GITHUB_OUTPUT
- run: |
mkdir -p tmp/data/
mkdir -p ${{ steps.data.outputs.path }}
- run: wget ${{ steps.data.outputs.webbpsf_url }} -O tmp/minimal-webbpsf-data.tar.gz
- run: tar -xzvf tmp/minimal-webbpsf-data.tar.gz -C tmp/data/
- id: data_hash
run: echo "hash=${{ hashFiles( 'tmp/data' ) }}" >> $GITHUB_OUTPUT
- run: mv tmp/data/* ${{ steps.data.outputs.path }}
- uses: actions/cache@v3
with:
path: ${{ steps.data.outputs.path }}
key: data-${{ steps.data_hash.outputs.hash }}-${{ steps.crds_context.outputs.pmap }}
- id: webbpsf_path
run: echo "path=${{ steps.data.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT
uses: ./.github/workflows/data.yml
test:
uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@main
needs: [ data ]
Expand All @@ -85,7 +37,8 @@ jobs:
CRDS_CLIENT_RETRY_COUNT: 3
CRDS_CLIENT_RETRY_DELAY_SECONDS: 20
cache-path: ${{ needs.data.outputs.crds_path }}
cache-key: data-${{ needs.data.outputs.data_hash }}-${{ needs.data.outputs.crds_context }}
cache-key: data-${{ needs.data.outputs.webbpsf_hash }}-${{ needs.data.outputs.crds_context }}
cache-restore-keys: webbpsf-${{ needs.data.outputs.webbpsf_hash }}
envs: |
- macos: py39
pytest-results-summary: true
Expand Down
55 changes: 4 additions & 51 deletions .github/workflows/tests_devdeps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,55 +26,7 @@ concurrency:
jobs:
data:
if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run devdeps tests')))
name: retrieve current CRDS context, and WebbPSF data
runs-on: ubuntu-latest
env:
OBSERVATORY: roman
CRDS_SERVER_URL: https://roman-crds.stsci.edu
CRDS_PATH: /tmp/data
outputs:
data_path: ${{ steps.data.outputs.path }}
webbpsf_path: ${{ steps.webbpsf_path.outputs.path }}
data_hash: ${{ steps.data_hash.outputs.hash }}
crds_path: ${{ steps.crds_path.outputs.path }}
crds_context: ${{ steps.crds_context.outputs.pmap }}
crds_server: ${{ steps.crds_server.outputs.url }}
steps:
# crds:
- id: crds_context
run: >
echo "pmap=$(
curl -s -X POST -d '{"jsonrpc": "1.0", "method": "get_default_context", "params": ["${{ env.OBSERVATORY }}"], "id": 1}' ${{ env.CRDS_SERVER_URL }}/json/ |
python -c "import sys, json; print(json.load(sys.stdin)['result'])"
)" >> $GITHUB_OUTPUT
# Get default CRDS_CONTEXT without installing crds client
# See https://hst-crds.stsci.edu/static/users_guide/web_services.html#generic-request
- id: crds_path
run: echo "path=${{ env.CRDS_PATH }}" >> $GITHUB_OUTPUT
- id: crds_server
run: echo "url=${{ env.CRDS_SERVER_URL }}" >> $GITHUB_OUTPUT
# webbpsf:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- id: data
run: |
echo "path=/tmp/data" >> $GITHUB_OUTPUT
echo "webbpsf_url=https://stsci.box.com/shared/static/n1fealx9q0m6sdnass6wnyfikvxtc0zz.gz" >> $GITHUB_OUTPUT
- run: |
mkdir -p tmp/data/
mkdir -p ${{ steps.data.outputs.path }}
- run: wget ${{ steps.data.outputs.webbpsf_url }} -O tmp/minimal-webbpsf-data.tar.gz
- run: tar -xzvf tmp/minimal-webbpsf-data.tar.gz -C tmp/data/
- id: data_hash
run: echo "hash=${{ hashFiles( 'tmp/data' ) }}" >> $GITHUB_OUTPUT
- run: mv tmp/data/* ${{ steps.data.outputs.path }}
- uses: actions/cache@v3
with:
path: ${{ steps.data.outputs.path }}
key: data-${{ steps.data_hash.outputs.hash }}-${{ steps.crds_context.outputs.pmap }}
- id: webbpsf_path
run: echo "path=${{ steps.data.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT
uses: ./.github/workflows/data.yml
test:
if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'run devdeps tests')))
uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@main
Expand All @@ -86,8 +38,9 @@ jobs:
CRDS_SERVER_URL: ${{ needs.data.outputs.crds_server }}
CRDS_CLIENT_RETRY_COUNT: 3
CRDS_CLIENT_RETRY_DELAY_SECONDS: 20
cache-path: ${{ needs.data.outputs.data_path }}
cache-key: data-${{ needs.data.outputs.data_hash }}-${{ needs.data.outputs.crds_context }}
cache-path: ${{ needs.data.outputs.crds_path }}
cache-key: data-${{ needs.data.outputs.webbpsf_hash }}-${{ needs.data.outputs.crds_context }}
cache-restore-keys: webbpsf-${{ needs.data.outputs.webbpsf_hash }}
envs: |
- linux: py39-devdeps
- macos: py39-devdeps
Expand Down

0 comments on commit c2671f0

Please sign in to comment.