diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
index b4ec534eb19..f74c91b66da 100644
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@@ -855,7 +855,7 @@ jobs:
         shell: bash
         run: |
           # temporarily remove gemma2 for Windows dGPU tests
-          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
             sed -i "s/- 'google\/gemma-2-2b-it'/# - 'google\/gemma-2-2b-it'/" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml
             sed -i "s/- 'google\/gemma-2-9b-it'/# - 'google\/gemma-2-9b-it'/" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml
           fi
@@ -1043,7 +1043,7 @@ jobs:
         shell: bash
         run: |
           # temporarily remove gemma2 for Windows dGPU tests
-          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
             sed -i "s/- 'google\/gemma-2-2b-it'/# - 'google\/gemma-2-2b-it'/" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml
             sed -i "s/- 'google\/gemma-2-9b-it'/# - 'google\/gemma-2-9b-it'/" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml
           fi
@@ -1230,7 +1230,7 @@ jobs:
         shell: bash
         run: |
           # temporarily remove gemma2 for Windows dGPU tests
-          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
             sed -i "s/- 'google\/gemma-2-2b-it'/# - 'google\/gemma-2-2b-it'/" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml
             sed -i "s/- 'google\/gemma-2-9b-it'/# - 'google\/gemma-2-9b-it'/" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml
           fi
@@ -1430,7 +1430,7 @@ jobs:
         shell: bash
         run: |
           # temporarily remove gemma2 for Windows dGPU tests
-          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
             sed -i "s/- 'google\/gemma-2-2b-it'/# - 'google\/gemma-2-2b-it'/" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml
           fi
 
@@ -1611,7 +1611,7 @@ jobs:
         shell: bash
         run: |
           # temporarily remove gemma2 for Windows dGPU tests
-          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
             sed -i "s/- 'google\/gemma-2-2b-it'/# - 'google\/gemma-2-2b-it'/" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml
           fi
           sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
diff --git a/.github/workflows/temp.yml b/.github/workflows/temp.yml
new file mode 100644
index 00000000000..5d829bc639e
--- /dev/null
+++ b/.github/workflows/temp.yml
@@ -0,0 +1,849 @@
+# Windows, currently only manually triggered
+  # TODO: merge igpu & dgpu perf test into one step for Windows GPU
+  llm-performance-test-on-dgpu:
+    if: ${{  github.event_name == 'workflow_dispatch' && inputs.dgpu  }}
+    needs: llm-cpp-build
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            python-version: "3.11"
+    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-dgpu]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: 'intel-analytics/ipex-llm'
+          ref: ${{ inputs.checkout-ref }}
+
+      # specific for test on certain commits
+      - name: Download llm binary
+        if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }}
+        uses: ./.github/actions/llm/download-llm-binary
+
+      - name: Prepare for install ipex-llm from source
+        if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }}
+        shell: bash
+        run: |
+          sed -i 's/"bigdl-core-xe-23==" + CORE_XE_VERSION/"bigdl-core-xe-23"/g' python/llm/setup.py
+          sed -i 's/"bigdl-core-xe-batch-23==" + CORE_XE_VERSION/"bigdl-core-xe-batch-23"/g' python/llm/setup.py
+          sed -i 's/"bigdl-core-xe-addons-23==" + CORE_XE_VERSION/"bigdl-core-xe-addons-23"/g' python/llm/setup.py
+
+      - name: Install ipex-llm and other related packages (install from source)
+        if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }}
+        shell: cmd
+        run: |
+          call conda create -n dgpu-perf python=${{ matrix.python-version }} libuv -y
+          call conda activate dgpu-perf
+
+          pip install --upgrade pip
+          pip install --upgrade wheel
+          pip install --upgrade omegaconf pandas
+          pip install --upgrade tiktoken einops transformers_stream_generator matplotlib
+
+          cd python\llm
+          python setup.py clean --all bdist_wheel --win
+          if not exist dist\ipex_llm*.whl (exit /b 1)
+          for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i
+
+          pip install --pre --upgrade %whl_name%[xpu_lnl] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/lnl/cn/
+
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          pip list
+
+          call conda deactivate
+
+      - name: Determine desired ipex-llm version
+        if: ${{ github.event.schedule || (github.event_name == 'workflow_dispatch' && (inputs.checkout-ref == 'main')) }}
+        shell: bash
+        run: |
+          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          test_version=2.2.0b$test_version_date
+          echo "TEST_VERSION=${test_version}" >> "$GITHUB_ENV"
+
+      - name: Install ipex-llm and other related packages (install from pypi)
+        if: ${{ github.event.schedule || (github.event_name == 'workflow_dispatch' && (inputs.checkout-ref == 'main')) }}
+        shell: cmd
+        run: |
+          call conda create -n dgpu-perf python=${{ matrix.python-version }} libuv -y
+          call conda activate dgpu-perf
+
+          pip install --upgrade pip
+          pip install --upgrade wheel
+          pip install --upgrade omegaconf pandas
+          pip install --upgrade tiktoken einops transformers_stream_generator matplotlib
+
+          pip install --pre --upgrade ipex-llm[xpu_lnl]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/lnl/cn/
+
+          pip show ipex-llm | findstr %TEST_VERSION%
+          if %ERRORLEVEL% neq 0 (
+            echo "Did not install ipex-llm with excepted version %TEST_VERSION%"
+            exit /b 1
+          )
+          pip list
+
+          call conda deactivate
+
+      - name: Create env for html generation
+        shell: cmd
+        run: |
+          call conda create -n html-gen python=3.11 -y
+          call conda activate html-gen
+
+          pip install pandas==1.5.3
+          pip install Jinja2
+          pip install "numpy<2.0.0"
+
+          call conda deactivate
+
+      - name: Set directory envs & and fix generated csv date name
+        shell: bash
+        run: |
+          if [[ ${{ github.event_name }} == "schedule" ]]; then
+            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+          elif [[ ${{ github.event_name }} == "workflow_dispatch" ]] && [[ ${{ inputs.checkout-ref }} == "main" ]]; then
+            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+          else
+            echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
+          fi
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
+
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+      # 32-32 int4+fp16
+      - name: Prepare igpu perf test (32-32 int4+fp16)
+        shell: bash
+        run: |
+          # hide time info
+          # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml
+
+      - name: Test on igpu (32-32 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.36 (32-32 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_436.yaml
+
+      - name: Test on igpu for transformers 4.36 (32-32 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.36.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+          
+      - name: Prepare igpu perf test for transformers 4.38 (32-32 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_438.yaml
+
+      - name: Test on igpu for transformers 4.38 (32-32 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.38.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.43 (32-32 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml
+
+      - name: Test on igpu for transformers 4.43 (32-32 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.43.1
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.45 (32-32 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_445.yaml
+
+      - name: Test on igpu for transformers 4.45 (32-32 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.45.0
+          pip install accelerate==0.33.0
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_445.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          pip install accelerate==0.23.0
+          call conda deactivate
+
+      - name: Concat csv and generate html (32-32 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\32-32_int4_fp16\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\32-32_int4_fp16\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\32-32_int4_fp16\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # TODO: create a action function here for different input
+      # 1024-128 int4+fp16
+      - name: Prepare igpu perf test (1024-128 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml
+
+      - name: Test on igpu (1024-128 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.37.0
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.36 (1024-128 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_436.yaml
+
+      - name: Test on igpu for transformers 4.36 (1024-128 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.36.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.38 (1024-128 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_438.yaml
+
+      - name: Test on igpu for transformers 4.38 (1024-128 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.38.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.43 (1024-128 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml
+
+      - name: Test on igpu for transformers 4.43 (1024-128 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.43.1
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.45 (1024-128 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_445.yaml
+
+      - name: Test on igpu for transformers 4.45 (1024-128 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.45.0
+          pip install accelerate==0.33.0
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_445.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          pip install accelerate==0.23.0
+          call conda deactivate
+
+      - name: Concat csv and generate html (1024-128 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\1024-128_int4_fp16\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_int4_fp16\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\1024-128_int4_fp16\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # 2048-256 int4+fp16
+      - name: Prepare igpu perf test (2048-256 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml
+
+      - name: Test on igpu (2048-256 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.37.0
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.36 (2048-256 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_436.yaml
+
+      - name: Test on igpu for transformers 4.36 (2048-256 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.36.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.38 (2048-256 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_438.yaml
+
+      - name: Test on igpu for transformers 4.38 (2048-256 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.38.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.43 (2048-256 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml
+
+      - name: Test on igpu for transformers 4.43 (2048-256 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.43.1
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.45 (2048-256 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_445.yaml
+
+      - name: Test on igpu for transformers 4.45 (2048-256 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.45.0
+          pip install accelerate==0.33.0
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_445.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          pip install accelerate==0.23.0
+          call conda deactivate
+
+      - name: Concat csv and generate html (2048-256 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\2048-256_int4_fp16\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256_int4_fp16\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\2048-256_int4_fp16\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # 3072-384 int4+fp16
+      - name: Prepare igpu perf test (3072-384 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/2048-256/3072-384/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml
+      
+      - name: Test on igpu (3072-384 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.37.0
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.36 (3072-384 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_436.yaml
+
+      - name: Test on igpu for transformers 4.36 (3072-384 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.36.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_436.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.38 (3072-384 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_438.yaml
+
+      - name: Test on igpu for transformers 4.38 (3072-384 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.38.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_438.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.43 (3072-384 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml
+
+      - name: Test on igpu for transformers 4.43 (3072-384 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.43.1
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_443.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.45 (3072-384 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_445.yaml
+
+      - name: Test on igpu for transformers 4.45 (3072-384 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.45.0
+          pip install accelerate==0.33.0
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_445.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          pip install accelerate==0.23.0
+          call conda deactivate
+
+      - name: Concat csv and generate html (3072-384 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\3072-384_int4_fp16\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\3072-384_int4_fp16\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\3072-384_int4_fp16\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # 4096-512 int4+fp16
+      - name: Prepare igpu perf test (4096-512 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/3072-384/4096-512/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml
+      
+      - name: Test on igpu (4096-512 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.37.0
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\4096-512_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.38 (4096-512 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_438.yaml
+
+      - name: Test on igpu for transformers 4.38 (4096-512 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.38.2
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_438.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\4096-512_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.43 (4096-512 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml
+
+      - name: Test on igpu for transformers 4.43 (4096-512 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.43.1
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_443.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\4096-512_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          call conda deactivate
+
+      - name: Prepare igpu perf test for transformers 4.45 (4096-512 int4+fp16)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_445.yaml
+
+      - name: Test on igpu for transformers 4.45 (4096-512 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate dgpu-perf
+          pip install transformers==4.45.0
+          pip install accelerate==0.33.0
+          pip install trl
+
+          set SYCL_CACHE_PERSISTENT=1
+          set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_445.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\4096-512_int4_fp16\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          pip uninstall trl -y
+          pip install accelerate==0.23.0
+          call conda deactivate
+
+      - name: Concat csv and generate html (4096-512 int4+fp16)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\4096-512_int4_fp16\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\4096-512_int4_fp16\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\4096-512_int4_fp16\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate