Skip to content

Optimize path and link validity check. #1616

Optimize path and link validity check.

Optimize path and link validity check. #1616

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: File Change Warning
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize]
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
Dockerfile-path-change-detection-in-GenAIComps:
runs-on: ubuntu-latest
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Clone GenAIExamples
run: |
cd ..
git clone https://github.com/opea-project/GenAIExamples
- name: Check Dockerfile Paths in Readme
if: always()
run: |
set -e
shopt -s globstar
cd ${{github.workspace}}
is_use="FALSE"
used_files=""
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)"
if [ -n "$changed_files" ]; then
for file in $changed_files; do
if grep -q "$file" ./**/*.md; then
is_use="TRUE"
used_files+="$file "
fi
done
fi
if [[ "$is_use" == "TRUE" ]]; then
echo "Warning: Changed Dockerfile paths:"
echo "$used_files"
echo "Please modify the corresponding README in GenAIComps."
exit 1
fi
- name: Check Dockerfile path included in image build yaml
if: always()
run: |
set -e
shopt -s globstar
no_add="FALSE"
cd ${{github.workspace}}
Dockerfiles=$(find ./comps -name '*Dockerfile*'|sed 's/^\.\///')
if [ -n "$Dockerfiles" ]; then
for Dockerfile in $Dockerfiles; do
service=$(echo "$Dockerfile" | awk -F '/' '{print $2}')
if grep -q "$Dockerfile" ../GenAIExamples/**/*build.yaml*; then
mode="" #CI
else
mode="-cd" #CD
fi
yaml_file=${{github.workspace}}/.github/workflows/docker/compose/"$service"-compose
if ! grep -q "$Dockerfile" "$yaml_file"*yaml; then
echo "AR: Update $Dockerfile to .github/workflows/docker/compose/"$service"-compose"$mode".yaml."
no_add="TRUE"
fi
done
fi
if [[ "$no_add" == "TRUE" ]]; then
exit 1
fi
- name: Check Dockerfile inside image build yaml exist in code
if: always()
run: |
shopt -s globstar
no_exist="FALSE"
cd ${{github.workspace}}
yamls=$(find .github/workflows/docker/compose/ -name '*.yaml')
if [ -n "$yamls" ]; then
for yaml in $yamls; do
dockerfiles=$(grep 'dockerfile:' "$yaml"|sed 's/dockerfile: //')
for dockerfile in $dockerfiles; do
if [[ "$dockerfile" == *"comps/"* ]]; then
if ! [ -e "$dockerfile" ]; then
# cat "$yaml"
echo "AR: The dockerfile path "$dockerfile" in "$yaml" does not exist, remove or update it."
no_exist="TRUE"
fi
fi
done
done
fi
if [[ "$no_exist" == "TRUE" ]]; then
exit 1
fi
Dockerfile-path-change-detection-in-GenAIExamples:
runs-on: ubuntu-latest
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Clone repo GenAIExamples
run: |
cd ..
git clone https://github.com/opea-project/GenAIExamples
- name: Check for changed Dockerfile paths
run: |
set -e
shopt -s globstar
cd ${{github.workspace}}
is_use="FALSE"
used_files=""
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)"
if [ -n "$changed_files" ]; then
for file in $changed_files; do
matching_files=$(grep -rl "$file" ../GenAIExamples/**/*.md) || true
if [ -n "$matching_files" ]; then
is_use="TRUE"
used_files+="$file "
echo "Modified Dockerfile '$file' is referenced in:"
echo "$matching_files"
else
echo "Modified Dockerfile '$file' is not referenced"
fi
done
fi
if [[ "$is_use" == "TRUE" ]]; then
echo "Warning: Changed Dockerfile paths:"
echo "$used_files"
echo "Please modify the corresponding README in GenAIExamples repo and ask [email protected] for final confirmation."
exit 1
fi
check-the-validity-of-hyperlinks-in-README:
runs-on: ubuntu-latest
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo GenAIComps
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check the Validity of Hyperlinks
# ignore_links=("https://platform.openai.com/docs/api-reference/fine-tuning"
# "https://platform.openai.com/docs/api-reference/"
# "https://openai.com/index/whisper/"
# "https://platform.openai.com/docs/api-reference/chat/create")
run: |
cd ${{github.workspace}}
fail="FALSE"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
if [ -n "changed_files" ]; then
for changed_file in $changed_files; do
echo $changed_file
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIComps/blob/main')
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
if [[ "https://platform.openai.com/docs/api-reference/fine-tuning" == "$url" || "https://platform.openai.com/docs/api-reference/" == "$url" || "https://openai.com/index/whisper/" == "$url" || "https://platform.openai.com/docs/api-reference/chat/create" == "$url" ]]; then
echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person."
else
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid link from ${{github.workspace}}/$path: $url"
fail="TRUE"
fi
fi
fi
done
fi
done
else
echo "No changed .md file."
fi
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All hyperlinks are valid."
fi
shell: bash
check-the-validity-of-relative-path:
runs-on: ubuntu-latest
steps:
- name: Clean up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo GenAIComps
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checking Relative Path Validity
run: |
cd ${{github.workspace}}
fail="FALSE"
repo_name=${{ github.event.pull_request.head.repo.full_name }}
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
branch="https://github.com/$owner/GenAIComps/tree/${{ github.event.pull_request.head.ref }}"
else
branch="https://github.com/opea-project/GenAIComps/blob/${{ github.event.pull_request.head.ref }}"
fi
link_head="https://github.com/opea-project/GenAIComps/blob/main"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
if [ -n "$png_lines" ]; then
for png_line in $png_lines; do
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
if [[ "${png_path:0:1}" == "/" ]]; then
check_path=$png_path
elif [[ "$png_path" == *#* ]]; then
relative_path=$(echo "$png_path" | cut -d '#' -f1)
if [ -n "$relative_path" ]; then
check_path=$(dirname "$refer_path")/$relative_path
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
else
check_path=$refer_path
fi
else
check_path=$(dirname "$refer_path")/$png_path
fi
if [ -e "$check_path" ]; then
real_path=$(realpath $check_path)
if [[ "$png_line" == *#* ]]; then
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIComps||')$png_path
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev"
fail="TRUE"
fi
else
echo "Validation succeed $png_line"
fi
fi
fi
else
echo "$check_path does not exist"
fail="TRUE"
fi
done
fi
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All hyperlinks are valid."
fi
shell: bash