Update CONTRIBUTING.md #12
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: 'LLM Evaluation' | |
on: | |
pull_request: | |
push: | |
#on: | |
# pull_request: | |
# paths: | |
# - src/ml | |
# - src/evaluation | |
env: | |
OLLAMA_MODEL_NAME: "phi3:3.8b-mini-4k-instruct-q4_K_M" | |
OLLAMA_EMBEDDING_MODEL_NAME: "all-minilm:l6-v2" | |
LLM_PROVIDER: "openai" | |
OPENAI_DEPLOYMENT_NAME: "phi3:3.8b-mini-4k-instruct-q4_K_M" # or gpt-4o-mini if you use openai | |
OPENAI_BASE_URL: "http://localhost:11434/v1" # ollama endpoint or https://api.openai.com/v1 if use use openai | |
OPENAI_API_KEY: "t" | |
ENABLE_EVALUATION: true # if true, you need to set the following | |
# LLMAAJ stands for LLM as a judge | |
LLMAAJ_PROVIDER: "openai" # or azure_openai | |
#if openai | |
LLMAAJ_OPENAI_DEPLOYMENT_NAME: "phi3:3.8b-mini-4k-instruct-q4_K_M" # or gpt-4o-mini if you use openai | |
LLMAAJ_OPENAI_BASE_URL: "http://localhost:11434/v1" # ollama endpoint or https://api.openai.com/v1 if use use openai | |
LLMAAJ_OPENAI_API_KEY: "t" | |
LLMAAJ_OPENAI_EMBEDDING_DEPLOYMENT_NAME: "text-embedding-ada-002" | |
jobs: | |
evaluate: | |
runs-on: ubuntu-latest | |
container: | |
image: aminedjeghri/python-uv-node:latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
# - name: Install NVM | |
# shell: bash | |
# run : make install_nvm_node | |
- name: Install Python dependencies | |
shell: bash | |
run: | | |
make install-dev | |
make clean-uv-cache | |
- name: Install NPM dependencies | |
shell: bash | |
run: | | |
make install-npm-dependencies | |
- name: Download local models | |
shell: bash | |
run: | | |
make download-ollama-model | |
- name: Run tests | |
shell: bash | |
run: | | |
ollama serve & | |
make test | |
# - name: Run promptfoo evaluation | |
# shell: bash | |
# run: | | |
# source ${{ env.VENV_PATH }}/bin/activate | |
# make eval |