[WIP] Add e2e test for tune
api with LLM hyperparameter optimization
#235
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E Test with tune API | |
on: | |
pull_request: | |
paths-ignore: | |
- "pkg/ui/v1beta1/frontend/**" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
e2e: | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 120 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Move docker data directory | |
shell: bash | |
run: | | |
echo "Stopping docker service ..." | |
sudo systemctl stop docker | |
DOCKER_DEFAULT_ROOT_DIR=/var/lib/docker | |
DOCKER_ROOT_DIR=/mnt/docker | |
echo "Moving ${DOCKER_DEFAULT_ROOT_DIR} -> ${DOCKER_ROOT_DIR}" | |
sudo mv ${DOCKER_DEFAULT_ROOT_DIR} ${DOCKER_ROOT_DIR} | |
echo "Creating symlink ${DOCKER_DEFAULT_ROOT_DIR} -> ${DOCKER_ROOT_DIR}" | |
sudo ln -s ${DOCKER_ROOT_DIR} ${DOCKER_DEFAULT_ROOT_DIR} | |
echo "$(sudo ls -l ${DOCKER_DEFAULT_ROOT_DIR})" | |
echo "Starting docker service ..." | |
sudo systemctl daemon-reload | |
sudo systemctl start docker | |
echo "Docker service status:" | |
sudo systemctl --no-pager -l -o short status docker | |
- name: Setup Test Env | |
uses: ./.github/workflows/template-setup-e2e-test | |
with: | |
kubernetes-version: ${{ matrix.kubernetes-version }} | |
- name: Install Training Operator SDK | |
shell: bash | |
run: | | |
pip install "kubeflow-training[huggingface]==1.8.1" | |
# Step to check disk space | |
- name: Check Disk Space | |
run: | | |
echo "Checking disk space usage before e2e test..." | |
df -h # Run 'df' to check free disk space | |
- name: Run e2e test with tune API | |
uses: ./.github/workflows/template-e2e-test | |
with: | |
tune-api: true | |
training-operator: true | |
# Step to check disk space | |
- name: Check Disk Space | |
if: always() # Run this step even if previous steps fail | |
run: | | |
echo "Checking disk space usage after e2e test..." | |
df -h # Run 'df' to check free disk space | |
# Step to get logs of the relevant Experiment pod | |
- name: Fetch Experiment Pod Logs | |
if: always() # Run this step even if previous steps fail | |
run: | | |
POD_NAME=$(kubectl get pods -n default --no-headers -o custom-columns=":metadata.name" | grep tune-example-2) | |
echo "Fetching logs for pod: $POD_NAME" | |
kubectl describe pod $POD_NAME -n default | |
kubectl logs $POD_NAME -n default | |
strategy: | |
fail-fast: false | |
matrix: | |
# Detail: https://hub.docker.com/r/kindest/node | |
kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"] |