.github/workflows/manual-accuary-test.yml

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

name: Run accuracy test on Gaudi

on:
  workflow_dispatch:
    inputs:
      examples:
        default: "ChatQnA"
        description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,FaqGen]'
        required: true
        type: string
      datasets:
        default: "en"
        description: "List of services to test [en, zh]"
        required: true
        type: string
  # for debug test
  # pull_request:
  #   branches: [main]
  #   types: [opened, reopened, ready_for_review, synchronize]

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}-on-push
  cancel-in-progress: true

jobs:
  get-test-matrix:
    runs-on: ubuntu-latest
    outputs:
      datasets: ${{ steps.get-matrix.outputs.datasets }}
      examples: ${{ steps.get-matrix.outputs.examples }}
    steps:
      - name: Create Matrix
        id: get-matrix
        run: |
          datasets=($(echo ${{ inputs.datasets }} | tr ',' ' '))
          datasets_json=$(printf '%s\n' "${datasets[@]}" | jq -R '.' | jq -sc '.')
          echo "datasets=$datasets_json" >> $GITHUB_OUTPUT
          examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
          examples_json=$(printf '%s\n' "${examples[@]}" | jq -R '.' | jq -sc '.')
          echo "examples=$examples_json" >> $GITHUB_OUTPUT

  setup-acc:
    runs-on: aise-acc
    needs: [get-test-matrix]
    strategy:
      matrix:
        dataset: ${{ fromJson(needs.get-test-matrix.outputs.datasets) }}
        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
        exclude:
          - example: AudioQnA
            dataset: zh
          - example: CodeGen
            dataset: zh
          - example: FaqGen
            dataset: zh
      fail-fast: false
    steps:
      - name: Clean up Working Directory
        run: |
          sudo rm -rf ${{github.workspace}}/* || true
          docker system prune -f
          docker rmi $(docker images --filter reference="100.83.111.229:5000/opea/*:latest" -q) || true

      - name: Checkout out Validation
        uses: actions/checkout@v4
        with:
          path: Validation

      - name: Checkout out GenAIEval
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          repository: opea-project/GenAIEval
          path: GenAIEval

      - name: Checkout out GenAIExamples
        uses: actions/checkout@v4
        with:
          repository: opea-project/GenAIExamples
          path: GenAIExamples

      - name: Set up environment
        env:
          example: ${{ matrix.example }}
        run: |
          set -x
          conda_env_name="OPEA_acc"
          export PATH=${HOME}/miniforge3/bin/:$PATH
          if conda info --envs | grep -q "$conda_env_name"; then
            echo "$conda_env_name exist! Recreating..."
            conda env remove --name "$conda_env_name"
            conda create -n ${conda_env_name} python=3.12 -y
            echo "Created!"
          else
            conda create -n ${conda_env_name} python=3.12 -y
          fi
          conda_version=$(conda --version 2>&1)
          version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
          if [[ $version -ge 6 ]]; then
            source activate ${conda_env_name}
          else
            source ${HOME}/miniforge3/etc/profile.d/conda.sh
            conda activate ${conda_env_name}
          fi
          echo "Dataset: ${{ matrix.dataset }}"
          echo "Example: ${{ matrix.example }}"
          cd ${{ github.workspace }}/GenAIEval
          pip install -r requirements.txt
          if [[ "${example}" == "AudioQnA" ]]; then
            cd ${{ github.workspace }}/GenAIExamples/AudioQnA/benchmark/accuracy
            pip install -r requirements.txt
          elif [[ "${example}" == "CodeGen" ]]; then
            pip install -e .
          elif [[ "${example}" == "FaqGen" ]]; then
            pip install ragas==0.1.19
          fi

      - name: Evaluation Prepare
        shell: bash
        if: ${{ matrix.dataset }} == 'en' && ${{ matrix.dataset }} == 'ChatQnA'
        env:
          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          example: ${{ matrix.example }}
          dataset: ${{ matrix.dataset }}
        run: |
          export PATH=${HOME}/miniforge3/bin/:$PATH
          conda_version=$(conda --version 2>&1)
          version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
          if [[ $version -ge 6 ]]; then
            source activate OPEA_acc
          else
            source ${HOME}/miniforge3/etc/profile.d/conda.sh
            conda activate OPEA_acc
          fi
          [ ! -d "acc-log" ] && mkdir acc-log
          cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/
          bash ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/acc_test.sh --eval_prepare "${example}" "${dataset}"  | tee ${{ github.workspace }}/acc-log/${example}-${dataset}-eval_prepare.txt

      - name: Launch Service
        env:
          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          example: ${{ matrix.example }}
          dataset: ${{ matrix.dataset }}
          IMAGE_REPO: 100.83.111.229:5000/opea
        run: |
          export PATH=${HOME}/miniforge3/bin/:$PATH
          conda_version=$(conda --version 2>&1)
          version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
          if [[ $version -ge 6 ]]; then
            source activate OPEA_acc
          else
            source ${HOME}/miniforge3/etc/profile.d/conda.sh
            conda activate OPEA_acc
          fi
          [ ! -d "acc-log" ] && mkdir acc-log
          cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/
          bash ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/acc_test.sh --launch_service "${example}" "${dataset}"  | tee ${{ github.workspace }}/acc-log/${example}-${dataset}-launch_service.txt

      - name: Run Evaluation
        env:
          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          example: ${{ matrix.example }}
          dataset: ${{ matrix.dataset }}
          port: 9001
        run: |
          set -x
          export PATH=${HOME}/miniforge3/bin/:$PATH
          conda_version=$(conda --version 2>&1)
          version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
          if [[ $version -ge 6 ]]; then
            source activate OPEA_acc
          else
            source ${HOME}/miniforge3/etc/profile.d/conda.sh
            conda activate OPEA_acc
          fi
          DPATH=${{ github.workspace }}/GenAIEval/
          export PYTHONPATH=$PYTHONPATH:$DPATH
          export PATH=$PATH:/bin:/usr/bin
          [ ! -d "acc-log" ] && mkdir acc-log
          cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/
          bash ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/acc_test.sh --launch_acc "${example}" "${dataset}"  | tee ${{ github.workspace }}/acc-log/${example}-${dataset}-acc_test.txt

      - name: Clean up container
        if: always()
        env: 
          example: ${{ matrix.example }}
        run: |
          docker system prune -f
          cd ${{ github.workspace }}/GenAIExamples/${example}/docker_compose/intel/hpu/gaudi
          docker compose stop && docker compose rm -f
          docker rmi $(docker images --filter reference="100.83.111.229:5000/opea/*:latest" -q) || true

      - name: Publish pipeline artifact
        if: ${{ !cancelled() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.example }}
          path: ${{ github.workspace }}/acc-log/*.txt

  Process-test-log:
    runs-on: aise-acc
    needs: [setup-acc, get-test-matrix]
    strategy:
      matrix:
        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
        dataset: ${{ fromJson(needs.get-test-matrix.outputs.datasets) }}
        exclude:
          - example: AudioQnA
            dataset: zh
          - example: CodeGen
            dataset: zh
          - example: FaqGen
            dataset: zh
      fail-fast: false
    steps:
      - name: Download artifact
        uses: actions/download-artifact@v4
        with:
          name: ${{ matrix.example }}
          path: ${{ github.workspace }}/acc-log/
        continue-on-error: false

      - name: Use the downloaded artifact
        run: cat ${{ github.workspace }}/acc-log/*acc_test.txt  | tee acc-log/summary.txt
          
      - name: Publish pipeline artifact
        if: ${{ !cancelled() }}
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}/acc-log/summary.txt
          name: acc-summary