Introduce MinIO Object Store event based dataprep and retriever using Milvus and LanceDB #423
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (C) 2024 Intel Corporation | |
# SPDX-License-Identifier: Apache-2.0 | |
name: Check hyperlinks and relative path validity | |
on: | |
pull_request: | |
branches: [main] | |
types: [opened, reopened, ready_for_review, synchronize] | |
# If there is a new commit, the previous jobs will be canceled | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
check-the-validity-of-hyperlinks-in-README: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean Up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo GenAIComps | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Check the Validity of Hyperlinks | |
# ignore_links=("https://platform.openai.com/docs/api-reference/fine-tuning" | |
# "https://platform.openai.com/docs/api-reference/" | |
# "https://openai.com/index/whisper/" | |
# "https://platform.openai.com/docs/api-reference/chat/create") | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" | |
if [ -n "$changed_files" ]; then | |
for changed_file in $changed_files; do | |
echo $changed_file | |
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIComps/blob/main') || true | |
if [ -n "$url_lines" ]; then | |
for url_line in $url_lines; do | |
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') | |
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
if [[ "$url" == "https://platform.openai.com/docs/api-reference/"* || "https://www.docker.com/get-started" == "$url" || "https://openai.com/index/whisper/" == "$url" ]]; then | |
echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." | |
else | |
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successfully*****" | |
else | |
echo "Invalid link from ${{github.workspace}}/$path: $url" | |
fail="TRUE" | |
fi | |
fi | |
fi | |
done | |
fi | |
done | |
else | |
echo "No changed .md file." | |
fi | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All hyperlinks are valid." | |
fi | |
shell: bash | |
check-the-validity-of-relative-path: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo GenAIComps | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Checking Relative Path Validity | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
repo_name=${{ github.event.pull_request.head.repo.full_name }} | |
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then | |
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) | |
branch="https://github.com/$owner/GenAIComps/tree/${{ github.event.pull_request.head.ref }}" | |
else | |
branch="https://github.com/opea-project/GenAIComps/blob/${{ github.event.pull_request.head.ref }}" | |
fi | |
link_head="https://github.com/opea-project/GenAIComps/blob/main" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" | |
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http') | |
if [ -n "$png_lines" ]; then | |
for png_line in $png_lines; do | |
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) | |
if [[ "${png_path:0:1}" == "/" ]]; then | |
check_path=$png_path | |
elif [[ "$png_path" == *#* ]]; then | |
relative_path=$(echo "$png_path" | cut -d '#' -f1) | |
if [ -n "$relative_path" ]; then | |
check_path=$(dirname "$refer_path")/$relative_path | |
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') | |
else | |
check_path=$refer_path | |
fi | |
else | |
check_path=$(dirname "$refer_path")/$png_path | |
fi | |
if [ -e "$check_path" ]; then | |
real_path=$(realpath $check_path) | |
if [[ "$png_line" == *#* ]]; then | |
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then | |
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIComps||')$png_path | |
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successfully*****" | |
else | |
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" | |
fail="TRUE" | |
fi | |
else | |
echo "Validation succeed $png_line" | |
fi | |
fi | |
fi | |
else | |
echo "$check_path does not exist" | |
fail="TRUE" | |
fi | |
done | |
fi | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All hyperlinks are valid." | |
fi | |
shell: bash |