From 6e0925bf31638c80ef1b951264a4ab770bbee859 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 13:56:44 +0000 Subject: [PATCH 01/42] migrate from circleci to github actions --- .circleci/config.yml | 346 --------------------------- .github/workflows/build_and_test.yml | 204 ++++++++++++++++ 2 files changed, 204 insertions(+), 346 deletions(-) delete mode 100644 .circleci/config.yml create mode 100644 .github/workflows/build_and_test.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 07e685b8f..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,346 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@2 - -commands: - run_chatgpt_api_test: - parameters: - inference_engine: - type: string - model_id: - type: string - expected_output: - type: string - prompt: - type: string - steps: - - run: - name: Run chatgpt api integration test (<>, <>) - command: | - source env/bin/activate - - # Set CLANG=1 for tinygrad only - if [ "<>" = "tinygrad" ]; then - pip install llvmlite - export TOKENIZERS_PARALLELISM=true SUPPORT_BF16=0 CLANG=1 - fi - - # Start first instance - HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <> \ - --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \ - --chatgpt-api-response-timeout 900 --disable-tui > output1.log & - PID1=$! - tail -f output1.log & - TAIL1=$! - - # Start second instance - HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <> \ - --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \ - --chatgpt-api-response-timeout 900 --disable-tui > output2.log & - PID2=$! - tail -f output2.log & - TAIL2=$! - - # Remember to kill the tail processes at the end - trap 'kill $TAIL1 $TAIL2' EXIT - - # Wait for discovery - sleep 10 - - # Function to check if processes are still running - check_processes() { - if ! kill -0 $PID1 2>/dev/null; then - echo "First instance (PID $PID1) died unexpectedly. Log output:" - cat output1.log - exit 1 - fi - if ! kill -0 $PID2 2>/dev/null; then - echo "Second instance (PID $PID2) died unexpectedly. Log output:" - cat output2.log - exit 1 - fi - } - - # Check processes before proceeding - check_processes - - echo "Sending request to first instance..." - response_1=$(curl -s http://localhost:8000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "<>", - "messages": [{"role": "user", "content": "<>"}], - "temperature": 0.7 - }') - echo "Response 1: $response_1" - - # Check processes after first response - check_processes - - echo "Sending request to second instance..." - response_2=$(curl -s http://localhost:8001/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "<>", - "messages": [{"role": "user", "content": "<>"}], - "temperature": 0.7 - }') - echo "Response 2: $response_2" - - # Check processes after second response - check_processes - - # Stop both instances - kill $PID1 $PID2 - - echo "" - # Extract content using jq and check if it contains expected output - content1=$(echo "$response_1" | jq -r '.choices[0].message.content') - content2=$(echo "$response_2" | jq -r '.choices[0].message.content') - - if [[ "$content1" != *"<>"* ]] || [[ "$content2" != *"<>"* ]]; then - echo "Test failed: Response does not match '<>'" - echo "Response 1 content: $content1" - echo "" - echo "Response 2 content: $content2" - echo "Output of first instance:" - cat output1.log - echo "Output of second instance:" - cat output2.log - exit 1 - else - echo "Test passed: Response from both nodes matches '<>'" - fi - -jobs: - unit_test: - macos: - xcode: "16.0.0" - resource_class: m2pro.large - steps: - - checkout - - run: - name: Set up Python - command: | - brew install python@3.12 - python3.12 -m venv env - source env/bin/activate - - run: - name: Install dependencies - command: | - source env/bin/activate - pip install --upgrade pip - pip install . - - run: - name: Run tests - command: | - source env/bin/activate - # set TEMPERATURE to 0 for deterministic sampling - echo "Running inference engine tests..." - METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine - echo "Running tokenizer tests..." - python3 ./test/test_tokenizers.py - python3 ./test/test_model_helpers.py - - discovery_integration_test: - macos: - xcode: "16.0.0" - steps: - - checkout - - run: - name: Set up Python - command: | - brew install python@3.12 - python3.12 -m venv env - source env/bin/activate - - run: - name: Install dependencies - command: | - source env/bin/activate - pip install --upgrade pip - pip install . - - run: - name: Run discovery integration test - command: | - source env/bin/activate - DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 & - PID1=$! - DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 & - PID2=$! - sleep 10 - kill $PID1 $PID2 - if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then - echo "Test passed: Both instances discovered each other" - exit 0 - else - echo "Test failed: Devices did not discover each other" - echo "Output of first instance:" - cat output1.log - echo "Output of second instance:" - cat output2.log - exit 1 - fi - - chatgpt_api_integration_test_mlx: - macos: - xcode: "16.0.0" - resource_class: m2pro.large - steps: - - checkout - - run: - name: Set up Python - command: | - brew install python@3.12 - python3.12 -m venv env - source env/bin/activate - - run: - name: Install dependencies - command: | - source env/bin/activate - pip install --upgrade pip - pip install . - - run_chatgpt_api_test: - inference_engine: mlx - model_id: llama-3.2-1b - prompt: "Keep responses concise. Who was the king of pop?" - expected_output: "Michael Jackson" - - chatgpt_api_integration_test_dummy: - macos: - xcode: "16.0.0" - resource_class: m2pro.large - steps: - - checkout - - run: - name: Set up Python - command: | - brew install python@3.12 - python3.12 -m venv env - source env/bin/activate - - run: - name: Install dependencies - command: | - source env/bin/activate - pip install --upgrade pip - pip install . - - run_chatgpt_api_test: - inference_engine: dummy - model_id: dummy - prompt: "Dummy prompt." - expected_output: "dummy" - - chatgpt_api_integration_test_tinygrad: - macos: - xcode: "16.0.0" - resource_class: m2pro.large - steps: - - checkout - - run: - name: Set up Python - command: | - brew install python@3.12 - python3.12 -m venv env - source env/bin/activate - - run: - name: Install dependencies - command: | - source env/bin/activate - pip install --upgrade pip - pip install . - - run_chatgpt_api_test: - inference_engine: tinygrad - model_id: llama-3.2-1b - prompt: "Keep responses concise. Who was the king of pop?" - expected_output: "Michael Jackson" - - measure_pip_sizes: - macos: - xcode: "16.0.0" - steps: - - checkout - - run: - name: Set up Python - command: | - brew install python@3.12 - python3.12 -m venv env - source env/bin/activate - - run: - name: Install dependencies and measure sizes - command: | - source env/bin/activate - pip install --upgrade pip - pip install . - python ./extra/pipsize.py --json ./pipsize.json - - store_artifacts: - path: ./pipsize.json - destination: pip-sizes.json - - check_line_count: - docker: - - image: cimg/python:3.10 - steps: - - checkout - - - run: - name: Setup git for PR comparison - command: | - if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then - PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev) - BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \ - "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \ - | jq -r '.target_branch') - - git clone -b $BASE_BRANCH --single-branch \ - https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \ - base_branch - fi - - - run: - name: Install dependencies - command: | - python -m pip install --upgrade pip - pip install tabulate - - - run: - name: Run line count check - command: | - if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then - python extra/line_counter.py base_branch . - else - python extra/line_counter.py . - fi - - - store_artifacts: - path: line-count-snapshot.json - destination: line-count-snapshot.json - - - store_artifacts: - path: line-count-diff.json - destination: line-count-diff.json - - - run: - name: Create test results directory - command: | - mkdir -p test-results/line-count - cp line-count-*.json test-results/line-count/ - - - store_test_results: - path: test-results - -workflows: - version: 2 - build_and_test: - jobs: - - check_line_count: - filters: - branches: - only: /.*/ - tags: - only: /.*/ - - unit_test - - discovery_integration_test - - chatgpt_api_integration_test_mlx - - chatgpt_api_integration_test_tinygrad - - chatgpt_api_integration_test_dummy - - measure_pip_sizes diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml new file mode 100644 index 000000000..02289150a --- /dev/null +++ b/.github/workflows/build_and_test.yml @@ -0,0 +1,204 @@ +name: Build and Test + +on: + push: + branches: [ '*' ] + tags: [ '*' ] + pull_request: + branches: [ '*' ] + +env: + PYTHON_VERSION: "3.12" + +jobs: + check_line_count: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tabulate + + - name: Run line count check + run: | + if [[ -n "${{ github.event.pull_request }}" ]]; then + git fetch origin ${{ github.base_ref }} + git clone -b ${{ github.base_ref }} --single-branch \ + https://github.com/${{ github.repository }}.git base_branch + python extra/line_counter.py base_branch . + else + python extra/line_counter.py . + fi + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: line-count-results + path: | + line-count-snapshot.json + line-count-diff.json + + unit_test: + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + python -m venv env + source env/bin/activate + pip install --upgrade pip + pip install . + + - name: Run tests + run: | + source env/bin/activate + # set TEMPERATURE to 0 for deterministic sampling + echo "Running inference engine tests..." + METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine + echo "Running tokenizer tests..." + python3 ./test/test_tokenizers.py + python3 ./test/test_model_helpers.py + + discovery_integration_test: + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + python -m venv env + source env/bin/activate + pip install --upgrade pip + pip install . + + - name: Run discovery integration test + run: | + source env/bin/activate + DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 & + PID1=$! + DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 & + PID2=$! + sleep 10 + kill $PID1 $PID2 + if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then + echo "Test passed: Both instances discovered each other" + exit 0 + else + echo "Test failed: Devices did not discover each other" + echo "Output of first instance:" + cat output1.log + echo "Output of second instance:" + cat output2.log + exit 1 + fi + + chatgpt_api_tests: + runs-on: macos-14 + strategy: + matrix: + inference_engine: [mlx, tinygrad, dummy] + include: + - inference_engine: mlx + model_id: llama-3.2-1b + prompt: "Keep responses concise. Who was the king of pop?" + expected_output: "Michael Jackson" + - inference_engine: tinygrad + model_id: llama-3.2-1b + prompt: "Keep responses concise. Who was the king of pop?" + expected_output: "Michael Jackson" + - inference_engine: dummy + model_id: dummy + prompt: "Dummy prompt." + expected_output: "dummy" + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + python -m venv env + source env/bin/activate + pip install --upgrade pip + pip install . + if [ "${{ matrix.inference_engine }}" = "tinygrad" ]; then + pip install llvmlite + fi + + - name: Run ChatGPT API test + env: + TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} + SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '' }} + CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '' }} + run: | + source env/bin/activate + + # Start first instance + HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \ + --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \ + --chatgpt-api-response-timeout 900 --disable-tui > output1.log & + PID1=$! + tail -f output1.log & + TAIL1=$! + + # Start second instance + HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \ + --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \ + --chatgpt-api-response-timeout 900 --disable-tui > output2.log & + PID2=$! + tail -f output2.log & + TAIL2=$! + + # Remember to kill the tail processes at the end + trap 'kill $TAIL1 $TAIL2' EXIT + + # Rest of the test script remains the same as in your CircleCI config + # ... (Copy the remaining test logic from the CircleCI config) + + measure_pip_sizes: + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies and measure sizes + run: | + python -m venv env + source env/bin/activate + pip install --upgrade pip + pip install . + python ./extra/pipsize.py --json ./pipsize.json + + - name: Upload pip sizes artifact + uses: actions/upload-artifact@v4 + with: + name: pip-sizes + path: ./pipsize.json From df832e20df02ad93a9ea46a88a82c7ab6214ecba Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 14:00:37 +0000 Subject: [PATCH 02/42] macos 15 --- .github/workflows/build_and_test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 02289150a..9bcba4070 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -48,7 +48,7 @@ jobs: line-count-diff.json unit_test: - runs-on: macos-14 + runs-on: macos-15 steps: - uses: actions/checkout@v4 @@ -75,7 +75,7 @@ jobs: python3 ./test/test_model_helpers.py discovery_integration_test: - runs-on: macos-14 + runs-on: macos-15 steps: - uses: actions/checkout@v4 @@ -113,7 +113,7 @@ jobs: fi chatgpt_api_tests: - runs-on: macos-14 + runs-on: macos-15 strategy: matrix: inference_engine: [mlx, tinygrad, dummy] @@ -180,7 +180,7 @@ jobs: # ... (Copy the remaining test logic from the CircleCI config) measure_pip_sizes: - runs-on: macos-14 + runs-on: macos-15 steps: - uses: actions/checkout@v4 From 62c9ec96962512bf2766364602549c839f02dfb2 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 14:02:56 +0000 Subject: [PATCH 03/42] github env vars --- extra/line_counter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extra/line_counter.py b/extra/line_counter.py index e5bf04289..01e7ee20e 100644 --- a/extra/line_counter.py +++ b/extra/line_counter.py @@ -74,9 +74,9 @@ def gen_diff(table_old, table_new): def create_json_report(table, is_diff=False): timestamp = datetime.now(timezone.utc).isoformat() - commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown') - branch = os.environ.get('CIRCLE_BRANCH', 'unknown') - pr_number = os.environ.get('CIRCLE_PR_NUMBER', '') + commit_sha = os.environ.get('GITHUB_SHA', 'unknown') + branch = os.environ.get('GITHUB_REF_NAME', 'unknown') + pr_number = os.environ.get('GITHUB_EVENT_NUMBER', '') if is_diff: files = [{ From 550f70b1864d0ef08147b11a1955880a252d4dbe Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 14:06:12 +0000 Subject: [PATCH 04/42] job --- .github/workflows/build_and_test.yml | 69 +++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9bcba4070..04eba6d22 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -176,8 +176,73 @@ jobs: # Remember to kill the tail processes at the end trap 'kill $TAIL1 $TAIL2' EXIT - # Rest of the test script remains the same as in your CircleCI config - # ... (Copy the remaining test logic from the CircleCI config) + # Wait for discovery + sleep 10 + + # Function to check if processes are still running + check_processes() { + if ! kill -0 $PID1 2>/dev/null; then + echo "First instance (PID $PID1) died unexpectedly. Log output:" + cat output1.log + exit 1 + fi + if ! kill -0 $PID2 2>/dev/null; then + echo "Second instance (PID $PID2) died unexpectedly. Log output:" + cat output2.log + exit 1 + fi + } + + # Check processes before proceeding + check_processes + + echo "Sending request to first instance..." + response_1=$(curl -s http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "${{ matrix.model_id }}", + "messages": [{"role": "user", "content": "${{ matrix.prompt }}"}], + "temperature": 0.7 + }') + echo "Response 1: $response_1" + + # Check processes after first response + check_processes + + echo "Sending request to second instance..." + response_2=$(curl -s http://localhost:8001/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "${{ matrix.model_id }}", + "messages": [{"role": "user", "content": "${{ matrix.prompt }}"}], + "temperature": 0.7 + }') + echo "Response 2: $response_2" + + # Check processes after second response + check_processes + + # Stop both instances + kill $PID1 $PID2 + + echo "" + # Extract content using jq and check if it contains expected output + content1=$(echo "$response_1" | jq -r '.choices[0].message.content') + content2=$(echo "$response_2" | jq -r '.choices[0].message.content') + + if [[ "$content1" != *"${{ matrix.expected_output }}"* ]] || [[ "$content2" != *"${{ matrix.expected_output }}"* ]]; then + echo "Test failed: Response does not match '${{ matrix.expected_output }}'" + echo "Response 1 content: $content1" + echo "" + echo "Response 2 content: $content2" + echo "Output of first instance:" + cat output1.log + echo "Output of second instance:" + cat output2.log + exit 1 + else + echo "Test passed: Response from both nodes matches '${{ matrix.expected_output }}'" + fi measure_pip_sizes: runs-on: macos-15 From 66c7c3386926ae2541a09b4e325328dc4d4a1fae Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 14:46:56 +0000 Subject: [PATCH 05/42] run tinygrad and discovery integratrion tests on linux --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 04eba6d22..ab8bd2052 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -75,7 +75,7 @@ jobs: python3 ./test/test_model_helpers.py discovery_integration_test: - runs-on: macos-15 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -113,7 +113,7 @@ jobs: fi chatgpt_api_tests: - runs-on: macos-15 + runs-on: ${{ matrix.inference_engine == 'tinygrad' && 'ubuntu-latest' || 'macos-15' }} strategy: matrix: inference_engine: [mlx, tinygrad, dummy] From 0a0c058b8024f7f738194fe2430e0980a32ee23a Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 14:52:07 +0000 Subject: [PATCH 06/42] more robust discovery log check --- .github/workflows/build_and_test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ab8bd2052..705027aae 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -100,7 +100,10 @@ jobs: PID2=$! sleep 10 kill $PID1 $PID2 - if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then + if grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log && \ + ! grep -q "Failed to connect peers:" output1.log && \ + grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log && \ + ! grep -q "Failed to connect peers:" output2.log; then echo "Test passed: Both instances discovered each other" exit 0 else From 4940f5269cf275e8ac8b9135a23dc0f71da3090e Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 15:00:26 +0000 Subject: [PATCH 07/42] check discovery on integration tests too --- .github/workflows/build_and_test.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 705027aae..8ea096751 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -116,7 +116,7 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ matrix.inference_engine == 'tinygrad' && 'ubuntu-latest' || 'macos-15' }} + runs-on: ${{ matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy' ? 'ubuntu-latest' : 'macos-15' }} strategy: matrix: inference_engine: [mlx, tinygrad, dummy] @@ -179,8 +179,20 @@ jobs: # Remember to kill the tail processes at the end trap 'kill $TAIL1 $TAIL2' EXIT - # Wait for discovery + # Wait for discovery and verify peer connections sleep 10 + if ! grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log || \ + grep -q "Failed to connect peers:" output1.log || \ + ! grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log || \ + grep -q "Failed to connect peers:" output2.log; then + echo "Test failed: Nodes did not discover each other properly" + echo "Output of first instance:" + cat output1.log + echo "Output of second instance:" + cat output2.log + exit 1 + fi + echo "Peer discovery successful" # Function to check if processes are still running check_processes() { From 4feaf73142a850cc104cbb6c0ef27c89b72c77e0 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 16:25:31 +0000 Subject: [PATCH 08/42] cond --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8ea096751..e6152c401 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -116,7 +116,7 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy' ? 'ubuntu-latest' : 'macos-15' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} strategy: matrix: inference_engine: [mlx, tinygrad, dummy] From 3bbca5723e206e0ca0daa99ac75a2939cc4e8675 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 16:39:39 +0000 Subject: [PATCH 09/42] give this a goh --- .github/workflows/build_and_test.yml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e6152c401..e7068f0b0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -9,6 +9,8 @@ on: env: PYTHON_VERSION: "3.12" + TOKENIZERS_PARALLELISM: "false" + PYTHONPATH: "." jobs: check_line_count: @@ -49,6 +51,7 @@ jobs: unit_test: runs-on: macos-15 + timeout-minutes: 20 steps: - uses: actions/checkout@v4 @@ -57,20 +60,29 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} + # - name: Cache python packages + # uses: actions/cache@v4 + # with: + # path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages + # key: testing-packages-${{ hashFiles('**/setup.py') }} + - name: Install dependencies run: | python -m venv env source env/bin/activate pip install --upgrade pip + pip install llvmlite pip install . + - name: Basic import test + run: | + source env/bin/activate + python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" + - name: Run tests run: | source env/bin/activate - # set TEMPERATURE to 0 for deterministic sampling - echo "Running inference engine tests..." - METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine - echo "Running tokenizer tests..." + METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=1 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine python3 ./test/test_tokenizers.py python3 ./test/test_model_helpers.py From a52ac61835b2baa2c6b1703ae33d06e829c9f40f Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 16:42:22 +0000 Subject: [PATCH 10/42] tooonygrad --- .github/workflows/build_and_test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e7068f0b0..17d9b5cad 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -167,8 +167,6 @@ jobs: - name: Run ChatGPT API test env: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} - SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '' }} - CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '' }} run: | source env/bin/activate From be18c96cef00fae061fcde3529c1aecfcbd14f5b Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 16:51:54 +0000 Subject: [PATCH 11/42] disable mlx test for now..plan to run this on a self-hosted runner --- .github/workflows/build_and_test.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 17d9b5cad..31eeba249 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -131,12 +131,13 @@ jobs: runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} strategy: matrix: - inference_engine: [mlx, tinygrad, dummy] + # inference_engine: [mlx, tinygrad, dummy] + inference_engine: [tinygrad, dummy] include: - - inference_engine: mlx - model_id: llama-3.2-1b - prompt: "Keep responses concise. Who was the king of pop?" - expected_output: "Michael Jackson" + # - inference_engine: mlx + # model_id: llama-3.2-1b + # prompt: "Keep responses concise. Who was the king of pop?" + # expected_output: "Michael Jackson" - inference_engine: tinygrad model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" From b6529c204770f06d7a6b2ed706b668eedaab88af Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 16:59:18 +0000 Subject: [PATCH 12/42] clang for tinygrad --- .github/workflows/build_and_test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 31eeba249..8ef7cd244 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -168,6 +168,8 @@ jobs: - name: Run ChatGPT API test env: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} + SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }} + CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} run: | source env/bin/activate From db7d3a5f7430ff294a1dee8950534fdaddec3da5 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:03:16 +0000 Subject: [PATCH 13/42] add another chatgpt api integration test for tinygrad on metal --- .github/workflows/build_and_test.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8ef7cd244..3ea443db9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -128,24 +128,25 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' && matrix.backend != 'metal') || matrix.inference_engine == 'dummy' ? 'ubuntu-latest' : 'macos-14' }} strategy: matrix: - # inference_engine: [mlx, tinygrad, dummy] - inference_engine: [tinygrad, dummy] include: - # - inference_engine: mlx - # model_id: llama-3.2-1b - # prompt: "Keep responses concise. Who was the king of pop?" - # expected_output: "Michael Jackson" - inference_engine: tinygrad model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" expected_output: "Michael Jackson" + backend: cpu + - inference_engine: tinygrad + model_id: llama-3.2-1b + prompt: "Keep responses concise. Who was the king of pop?" + expected_output: "Michael Jackson" + backend: metal - inference_engine: dummy model_id: dummy prompt: "Dummy prompt." expected_output: "dummy" + backend: cpu steps: - uses: actions/checkout@v4 @@ -170,6 +171,11 @@ jobs: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }} CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} + METAL: ${{ matrix.backend == 'metal' && '1' || '0' }} + METAL_DEVICE_WRAPPER_TYPE: ${{ matrix.backend == 'metal' && '1' || '0' }} + METAL_DEBUG_ERROR_MODE: ${{ matrix.backend == 'metal' && '1' || '0' }} + METAL_XCODE: ${{ matrix.backend == 'metal' && '1' || '0' }} + run: | source env/bin/activate From 0fe8065ffbf65687e50f9c07d2d061e7d9e7de6f Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:06:48 +0000 Subject: [PATCH 14/42] remove tinygrad macos --- .github/workflows/build_and_test.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3ea443db9..8ef7cd244 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -128,25 +128,24 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' && matrix.backend != 'metal') || matrix.inference_engine == 'dummy' ? 'ubuntu-latest' : 'macos-14' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} strategy: matrix: + # inference_engine: [mlx, tinygrad, dummy] + inference_engine: [tinygrad, dummy] include: + # - inference_engine: mlx + # model_id: llama-3.2-1b + # prompt: "Keep responses concise. Who was the king of pop?" + # expected_output: "Michael Jackson" - inference_engine: tinygrad model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" expected_output: "Michael Jackson" - backend: cpu - - inference_engine: tinygrad - model_id: llama-3.2-1b - prompt: "Keep responses concise. Who was the king of pop?" - expected_output: "Michael Jackson" - backend: metal - inference_engine: dummy model_id: dummy prompt: "Dummy prompt." expected_output: "dummy" - backend: cpu steps: - uses: actions/checkout@v4 @@ -171,11 +170,6 @@ jobs: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }} CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} - METAL: ${{ matrix.backend == 'metal' && '1' || '0' }} - METAL_DEVICE_WRAPPER_TYPE: ${{ matrix.backend == 'metal' && '1' || '0' }} - METAL_DEBUG_ERROR_MODE: ${{ matrix.backend == 'metal' && '1' || '0' }} - METAL_XCODE: ${{ matrix.backend == 'metal' && '1' || '0' }} - run: | source env/bin/activate From 099917a1deaf62a5922d612f72ffa99659724323 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:13:56 +0000 Subject: [PATCH 15/42] prio loopback over container virtual --- exo/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/helpers.py b/exo/helpers.py index 943d7f514..fb497d188 100644 --- a/exo/helpers.py +++ b/exo/helpers.py @@ -240,11 +240,11 @@ def get_interface_priority_and_type(ifname: str) -> Tuple[int, str]: # Local container/virtual interfaces if (ifname.startswith(('docker', 'br-', 'veth', 'cni', 'flannel', 'calico', 'weave')) or 'bridge' in ifname): - return (7, "Container Virtual") + return (6, "Container Virtual") # Loopback interface if ifname.startswith('lo'): - return (6, "Loopback") + return (7, "Loopback") # Thunderbolt/10GbE detection if ifname.startswith(('tb', 'nx', 'ten')): From 32ba0b4cd25f986aea7fa64db5afd596d410bddf Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:21:25 +0000 Subject: [PATCH 16/42] teeenygrad --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8ef7cd244..52fc7daad 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -168,7 +168,7 @@ jobs: - name: Run ChatGPT API test env: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} - SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }} + SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} run: | source env/bin/activate From e645b2b6b3d02607a12d09da6c40d7f85853bf71 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:30:52 +0000 Subject: [PATCH 17/42] run on beefy machine --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 52fc7daad..39a939c24 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -128,7 +128,7 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} strategy: matrix: # inference_engine: [mlx, tinygrad, dummy] @@ -168,7 +168,7 @@ jobs: - name: Run ChatGPT API test env: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} - SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} + SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }} CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} run: | source env/bin/activate From 8eaf9d74c36dd24e90b7cdb261c7b36f9ff95c62 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:47:38 +0000 Subject: [PATCH 18/42] group and labels --- .github/workflows/build_and_test.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 39a939c24..ca0b3b36a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -50,7 +50,9 @@ jobs: line-count-diff.json unit_test: - runs-on: macos-15 + runs-on: + group: Default + labels: macos-15 timeout-minutes: 20 steps: - uses: actions/checkout@v4 @@ -128,7 +130,9 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} + runs-on: + group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'Default Larger Runners' || 'Default' }} + labels: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} strategy: matrix: # inference_engine: [mlx, tinygrad, dummy] @@ -273,7 +277,9 @@ jobs: fi measure_pip_sizes: - runs-on: macos-15 + runs-on: + group: Default + labels: macos-15 steps: - uses: actions/checkout@v4 From 8cf9a871d3a7d844fc2e0d02cfacdea140f3cf1e Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:49:49 +0000 Subject: [PATCH 19/42] t --- .github/workflows/build_and_test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ca0b3b36a..b80be1385 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -51,7 +51,6 @@ jobs: unit_test: runs-on: - group: Default labels: macos-15 timeout-minutes: 20 steps: @@ -278,7 +277,6 @@ jobs: measure_pip_sizes: runs-on: - group: Default labels: macos-15 steps: - uses: actions/checkout@v4 From 9fbce8c10ad2977b750ab5dc9ecbe25ad3f6da9e Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:51:20 +0000 Subject: [PATCH 20/42] t From f4ffdcfef26495709bd9ee66e5f4c9178b11fb1b Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:53:39 +0000 Subject: [PATCH 21/42] t --- .github/workflows/build_and_test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b80be1385..feab6c4c3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -130,7 +130,6 @@ jobs: chatgpt_api_tests: runs-on: - group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'Default Larger Runners' || 'Default' }} labels: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} strategy: matrix: From e873d273cdc7fa27aaa183b4df56a82f4f3ea481 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:58:31 +0000 Subject: [PATCH 22/42] t --- .github/workflows/build_and_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index feab6c4c3..7e94ed4f6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -130,7 +130,8 @@ jobs: chatgpt_api_tests: runs-on: - labels: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} + # group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'Default Larger Runners' || 'Default' }} + group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} strategy: matrix: # inference_engine: [mlx, tinygrad, dummy] From de99933e4ad5a60b1cd13a762def454549431dc6 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 17:59:33 +0000 Subject: [PATCH 23/42] a --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7e94ed4f6..d9dcaf646 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -129,9 +129,9 @@ jobs: fi chatgpt_api_tests: - runs-on: + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} # group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'Default Larger Runners' || 'Default' }} - group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} + # group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} strategy: matrix: # inference_engine: [mlx, tinygrad, dummy] From 0d95fe380d9c9334b03d3ac12332d1a22c09820a Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 18:00:28 +0000 Subject: [PATCH 24/42] e From 135a2276279f3347ed4601e2e777ec0d140eddda Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 6 Dec 2024 18:02:51 +0000 Subject: [PATCH 25/42] t --- .github/workflows/build_and_test.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d9dcaf646..8ef7cd244 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -50,8 +50,7 @@ jobs: line-count-diff.json unit_test: - runs-on: - labels: macos-15 + runs-on: macos-15 timeout-minutes: 20 steps: - uses: actions/checkout@v4 @@ -129,9 +128,7 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} - # group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'Default Larger Runners' || 'Default' }} - # group: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest-large' || 'macos-15' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} strategy: matrix: # inference_engine: [mlx, tinygrad, dummy] @@ -276,8 +273,7 @@ jobs: fi measure_pip_sizes: - runs-on: - labels: macos-15 + runs-on: macos-15 steps: - uses: actions/checkout@v4 From 5abdf6a9d88a536882ed1f468d46902272092ca3 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 18:24:56 +0000 Subject: [PATCH 26/42] new dash --- extra/dashboard/dashboard.py | 1270 +++--------------------------- extra/dashboard/mock_data.json | 54 ++ extra/dashboard/requirements.txt | 7 +- 3 files changed, 181 insertions(+), 1150 deletions(-) create mode 100644 extra/dashboard/mock_data.json diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index 0ef6d3edd..df047c3f7 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -1,1147 +1,127 @@ -import os +import dash +from dash import html, dcc +import plotly.graph_objs as go +from dash.dependencies import Input, Output +import boto3 import json -import logging -import asyncio -import aiohttp -import pandas as pd -import plotly.express as px -from typing import List, Dict, Optional -from pathlib import Path -from plotly.subplots import make_subplots -import plotly.graph_objects as go -import time -import pygame.mixer -from datetime import datetime - -class AsyncCircleCIClient: - def __init__(self, token: str, project_slug: str): - self.token = token - self.project_slug = project_slug - self.base_url = "https://circleci.com/api/v2" - self.headers = { - "Circle-Token": token, - "Accept": "application/json" - } - self.logger = logging.getLogger("CircleCI") - - async def get_json(self, session: aiohttp.ClientSession, url: str, params: Dict = None) -> Dict: - async with session.get(url, params=params) as response: - response.raise_for_status() - return await response.json() - - async def get_recent_pipelines( - self, - session: aiohttp.ClientSession, - org_slug: str = None, - page_token: str = None, - limit: int = None, - branch: str = None - ): - """ - Get recent pipelines for a project with pagination support - """ - params = { - "branch": branch, - "page-token": page_token - } - - # Remove None values - params = {k: v for k, v in params.items() if v is not None} - - url = f"{self.base_url}/project/{self.project_slug}/pipeline" - data = await self.get_json(session, url, params) - pipelines = data["items"] - - next_page_token = data.get("next_page_token") - - # If we have a limit, check if we need more pages - if limit and len(pipelines) >= limit: - return pipelines - - # If there are more pages and we haven't hit the limit, recursively get them - if next_page_token: - next_pipelines = await self.get_recent_pipelines( - session, - org_slug, - page_token=next_page_token, - limit=limit - len(pipelines) if limit else None, # Adjust limit for next page - branch=branch - ) - pipelines.extend(next_pipelines) - - return pipelines - - async def get_workflow_jobs(self, session: aiohttp.ClientSession, pipeline_id: str) -> List[Dict]: - self.logger.debug(f"Fetching workflows for pipeline {pipeline_id}") - url = f"{self.base_url}/pipeline/{pipeline_id}/workflow" - workflows_data = await self.get_json(session, url) - workflows = workflows_data["items"] - - # Fetch all jobs for all workflows in parallel - jobs_tasks = [] - for workflow in workflows: - url = f"{self.base_url}/workflow/{workflow['id']}/job" - jobs_tasks.append(self.get_json(session, url)) - - jobs_responses = await asyncio.gather(*jobs_tasks, return_exceptions=True) - - all_jobs = [] - for jobs_data in jobs_responses: - if isinstance(jobs_data, Exception): - continue - all_jobs.extend(jobs_data["items"]) - - return all_jobs - - async def get_artifacts(self, session: aiohttp.ClientSession, job_number: str) -> List[Dict]: - url = f"{self.base_url}/project/{self.project_slug}/{job_number}/artifacts" - data = await self.get_json(session, url) - return data["items"] - -class PackageSizeTracker: - def __init__(self, token: str, project_slug: str, debug: bool = False): - self.setup_logging(debug) - self.client = AsyncCircleCIClient(token, project_slug) - self.logger = logging.getLogger("PackageSizeTracker") - self.last_data_hash = None - self.debug = debug - - # Initialize pygame mixer - pygame.mixer.init() - - # Sound file paths - can use MP3 files with pygame - sounds_dir = Path(__file__).parent / "sounds" - self.sounds = { - 'lines_up': sounds_dir / "gta5_wasted.mp3", - 'lines_down': sounds_dir / "pokemon_evolve.mp3", - 'tokens_up': sounds_dir / "pokemon_evolve.mp3", - 'tokens_down': sounds_dir / "gta5_wasted.mp3", - 'size_up': sounds_dir / "gta5_wasted.mp3", - 'size_down': sounds_dir / "pokemon_evolve.mp3" - } - - def test_sound_effects(self): - """Test all sound effects with a small delay between each""" - self.logger.info("Testing sound effects...") - for sound_key in self.sounds: - self.logger.info(f"Playing {sound_key}") - self._play_sound(sound_key) - time.sleep(1) # Wait 1 second between sounds - - def setup_logging(self, debug: bool): - level = logging.DEBUG if debug else logging.INFO - logging.basicConfig( - level=level, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - datefmt='%H:%M:%S' - ) - - def extract_commit_info(self, pipeline: Dict) -> Optional[Dict]: - try: - # Extract from github_app first (preferred) - if 'trigger_parameters' in pipeline and 'github_app' in pipeline['trigger_parameters']: - github_app = pipeline['trigger_parameters']['github_app'] - return { - 'commit_hash': github_app.get('checkout_sha'), - 'web_url': f"{github_app.get('repo_url')}/commit/{github_app.get('checkout_sha')}", - 'branch': github_app.get('branch', 'unknown'), - 'author': { - 'name': github_app.get('commit_author_name'), - 'email': github_app.get('commit_author_email'), - 'username': github_app.get('user_username') - }, - 'message': github_app.get('commit_message') - } - - # Fallback to git parameters - if 'trigger_parameters' in pipeline and 'git' in pipeline['trigger_parameters']: - git = pipeline['trigger_parameters']['git'] - return { - 'commit_hash': git.get('checkout_sha'), - 'web_url': f"{git.get('repo_url')}/commit/{git.get('checkout_sha')}", - 'branch': git.get('branch', 'unknown'), - 'author': { - 'name': git.get('commit_author_name'), - 'email': git.get('commit_author_email'), - 'username': git.get('author_login') - }, - 'message': git.get('commit_message') - } - - self.logger.warning(f"Could not find commit info in pipeline {pipeline['id']}") - return None - - except Exception as e: - self.logger.error(f"Error extracting commit info: {str(e)}") - return None - - async def process_pipeline(self, session: aiohttp.ClientSession, pipeline: Dict) -> Optional[Dict]: - try: - commit_info = self.extract_commit_info(pipeline) - if not commit_info: - return None - - data_point = { - "commit_hash": commit_info['commit_hash'], - "commit_url": commit_info['web_url'], - "timestamp": pipeline.get("created_at", pipeline.get("updated_at")), - "pipeline_status": pipeline.get("state", "unknown"), - "branch": commit_info['branch'], - "author": commit_info['author'], - "commit_message": commit_info['message'] - } - - jobs = await self.client.get_workflow_jobs(session, pipeline["id"]) - - # Get package size data - size_job = next( - (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"), - None - ) - - # Get line count data - linecount_job = next( - (j for j in jobs if j["name"] == "check_line_count" and j["status"] == "success"), - None - ) - - # Get benchmark data from runner job - benchmark_job = next( - (j for j in jobs if j["name"] == "runner" and j["status"] == "success"), - None - ) - - # Return None if no relevant jobs found - if not size_job and not linecount_job and not benchmark_job: - self.logger.debug(f"No relevant jobs found for pipeline {pipeline['id']}") - return None - - # Process benchmark data if available - if benchmark_job: - benchmark_artifacts = await self.client.get_artifacts(session, benchmark_job["job_number"]) - benchmark_report = next( - (a for a in benchmark_artifacts if a["path"].endswith("benchmark.json")), - None - ) - if benchmark_report: - benchmark_data = await self.client.get_json(session, benchmark_report["url"]) - data_point.update({ - "tokens_per_second": benchmark_data["tokens_per_second"], - "time_to_first_token": benchmark_data.get("time_to_first_token", 0) - }) - self.logger.info( - f"Processed benchmark data for pipeline {pipeline['id']}: " - f"commit {commit_info['commit_hash'][:7]}, " - f"tokens/s {benchmark_data['tokens_per_second']:.2f}" - ) - - # Process size data if available - if size_job: - size_artifacts = await self.client.get_artifacts(session, size_job["job_number"]) - size_report = next( - (a for a in size_artifacts if a["path"].endswith("pip-sizes.json")), - None - ) - if size_report: - size_data = await self.client.get_json(session, size_report["url"]) - data_point.update({ - "total_size_mb": size_data["total_size_mb"], - "packages": size_data["packages"] - }) - self.logger.info( - f"Processed size data for pipeline {pipeline['id']}: " - f"commit {commit_info['commit_hash'][:7]}, " - f"size {size_data['total_size_mb']:.2f}MB" - ) - - # Process linecount data if available - if linecount_job: - linecount_artifacts = await self.client.get_artifacts(session, linecount_job["job_number"]) - linecount_report = next( - (a for a in linecount_artifacts if a["path"].endswith("line-count-snapshot.json")), - None - ) - if linecount_report: - linecount_data = await self.client.get_json(session, linecount_report["url"]) - data_point.update({ - "total_lines": linecount_data["total_lines"], - "total_files": linecount_data["total_files"], - "files": linecount_data["files"] - }) - self.logger.info( - f"Processed line count data for pipeline {pipeline['id']}: " - f"commit {commit_info['commit_hash'][:7]}, " - f"lines {linecount_data['total_lines']:,}" - ) - - return data_point - - except Exception as e: - self.logger.error(f"Error processing pipeline {pipeline['id']}: {str(e)}") - return None - - async def process_pipeline_batch( - self, - session: aiohttp.ClientSession, - pipelines: List[Dict], - batch_size: int = 5 - ) -> List[Dict]: - """ - Process a batch of pipelines with rate limiting. - - Args: - session: aiohttp client session - pipelines: List of pipelines to process - batch_size: Number of pipelines to process in parallel - - Returns: - List of processed pipeline data points - """ - data_points = [] - - for i in range(0, len(pipelines), batch_size): - batch = pipelines[i:i + batch_size] - - # Process batch in parallel - tasks = [self.process_pipeline(session, pipeline) for pipeline in batch] - batch_results = await asyncio.gather(*tasks) - - # Filter out None results - batch_data = [r for r in batch_results if r is not None] - data_points.extend(batch_data) - - # Add delay between batches if there are more to process - if i + batch_size < len(pipelines): - await asyncio.sleep(1) # 1 second delay between batches - - return data_points - - async def collect_data(self) -> List[Dict]: - self.logger.info("Starting data collection...") - async with aiohttp.ClientSession(headers=self.client.headers) as session: - # Get pipelines from main branch - main_pipelines = await self.client.get_recent_pipelines( - session, - org_slug=self.client.project_slug, - limit=20, - branch="main" - ) - - # Add delay between branch requests - await asyncio.sleep(2) - - # Get pipelines from circleci branch - circleci_pipelines = await self.client.get_recent_pipelines( - session, - org_slug=self.client.project_slug, - limit=20, - branch="circleci" - ) - - # Combine pipelines and sort by created_at date - pipelines = main_pipelines + circleci_pipelines - pipelines.sort( - key=lambda x: datetime.fromisoformat( - x.get("created_at", x.get("updated_at")).replace('Z', '+00:00') - ), - reverse=True # Most recent first - ) - - self.logger.info(f"Found {len(pipelines)} recent pipelines") - - # Process pipelines in batches - data_points = await self.process_pipeline_batch(session, pipelines) - - # Sort by timestamp - data_points.sort( - key=lambda x: datetime.fromisoformat( - x.get("timestamp").replace('Z', '+00:00') - ), - reverse=True # Most recent first - ) - - return data_points - - def generate_report(self, data: List[Dict], output_dir: str = "reports") -> Optional[str]: - self.logger.info("Generating report...") - if not data: - self.logger.error("No data to generate report from!") - return None - - # Get latest pipeline status based on errors - latest_main_pipeline = next((d for d in data if d.get('branch') == 'main'), None) - latest_pipeline_status = 'success' if latest_main_pipeline and not latest_main_pipeline.get('errors') else 'failure' - - # Log the pipeline status - if latest_main_pipeline: - self.logger.info( - f"Latest main branch pipeline status: {latest_pipeline_status} " - f"(commit: {latest_main_pipeline['commit_hash'][:7]})" - ) - else: - self.logger.warning("No pipeline data found for main branch") - - # Convert output_dir to Path object - output_dir = Path(output_dir) - - # Create output directory if it doesn't exist - output_dir.mkdir(parents=True, exist_ok=True) - - # Create separate dataframes for each metric - df_size = pd.DataFrame([d for d in data if 'total_size_mb' in d]) - df_lines = pd.DataFrame([d for d in data if 'total_lines' in d]) - df_benchmark = pd.DataFrame([d for d in data if 'tokens_per_second' in d]) - - # Create a single figure with subplots - fig = make_subplots( - rows=3, cols=2, - subplot_titles=('', 'Package Size', '', 'Line Count', '', 'Tokens per Second'), - vertical_spacing=0.2, - column_widths=[0.2, 0.8], - specs=[[{"type": "indicator"}, {"type": "scatter"}], - [None, {"type": "scatter"}], - [None, {"type": "scatter"}]] - ) - - # Add package size trace if we have data - if not df_size.empty: - df_size['timestamp'] = pd.to_datetime(df_size['timestamp']) - df_size = df_size.sort_values('timestamp') - - fig.add_trace( - go.Scatter( - x=df_size['timestamp'], - y=df_size['total_size_mb'], - mode='lines+markers', - name='Package Size', - customdata=df_size[['commit_hash', 'commit_url']].values, - hovertemplate="
".join([ - "Size: %{y:.2f}MB", - "Date: %{x}", - "Commit: %{customdata[0]}", - "" - ]) - ), - row=1, col=2 - ) - fig.update_yaxes(title_text="Size (MB)", row=1, col=2) - - # Add line count trace if we have data - if not df_lines.empty: - df_lines['timestamp'] = pd.to_datetime(df_lines['timestamp']) - df_lines = df_lines.sort_values('timestamp') - - fig.add_trace( - go.Scatter( - x=df_lines['timestamp'], - y=df_lines['total_lines'], - mode='lines+markers', - name='Line Count', - customdata=df_lines[['commit_hash', 'commit_url']].values, - hovertemplate="
".join([ - "Lines: %{y:,.0f}", - "Date: %{x}", - "Commit: %{customdata[0]}", - "" - ]) - ), - row=2, col=2 - ) - fig.update_yaxes(title_text="Total Lines", row=2, col=2) - - # Add tokens per second trace if we have data - if not df_benchmark.empty: - df_benchmark['timestamp'] = pd.to_datetime(df_benchmark['timestamp']) - df_benchmark = df_benchmark.sort_values('timestamp') - - fig.add_trace( - go.Scatter( - x=df_benchmark['timestamp'], - y=df_benchmark['tokens_per_second'], - mode='lines+markers', - name='Tokens/Second', - customdata=df_benchmark[['commit_hash', 'commit_url']].values, - hovertemplate="
".join([ - "Tokens/s: %{y:.2f}", - "Date: %{x}", - "Commit: %{customdata[0]}", - "" - ]) - ), - row=3, col=2 - ) - fig.update_yaxes(title_text="Tokens per Second", row=3, col=2) - - # Update layout - fig.update_layout( - height=800, - showlegend=False, - title_text="Package Metrics Dashboard", - title_x=0.5, - plot_bgcolor='white', - paper_bgcolor='white', - font=dict(size=12), - hovermode='x unified' - ) - - # Update the dashboard HTML with date range picker - dashboard_html = f""" - - - Package Metrics Dashboard - - - - -
- -
- - - - - - -
-
- -
-
-
Pipeline Status
-
-
- {'✓ Pipeline Passing' if latest_pipeline_status == 'success' else '✗ Pipeline Failing'} -
-
-
-
-
Package Size
-
-
-
-
Line Count
-
-
-
-
-
-
Tokens per Second
-
-
-
-
- - - - - - - - - """ - - # Write the dashboard - dashboard_path = output_dir / "dashboard.html" - with open(dashboard_path, "w") as f: - f.write(dashboard_html) - - # Generate summary with available metrics - latest_data = {} - - if not df_size.empty: - latest = df_size.iloc[-1] - previous = df_size.iloc[-2] if len(df_size) > 1 else latest - size_change = float(latest['total_size_mb'] - previous['total_size_mb']) - latest_data.update({ - 'timestamp': latest['timestamp'].isoformat(), - 'commit_hash': latest['commit_hash'], - 'commit_url': latest['commit_url'], - 'total_size_mb': float(latest['total_size_mb']), - 'size_change_mb': size_change, - 'packages': latest.get('packages', []) - }) - - if not df_lines.empty: - latest = df_lines.iloc[-1] - previous = df_lines.iloc[-2] if len(df_lines) > 1 else latest - linecount_change = int(latest['total_lines'] - previous['total_lines']) - if not latest_data: # Only add timestamp and commit info if not already added - latest_data.update({ - 'timestamp': latest['timestamp'].isoformat(), - 'commit_hash': latest['commit_hash'], - 'commit_url': latest['commit_url'], - }) - latest_data.update({ - 'total_lines': int(latest['total_lines']), - 'linecount_change': linecount_change - }) - - if not df_benchmark.empty: - latest = df_benchmark.iloc[-1] - previous = df_benchmark.iloc[-2] if len(df_benchmark) > 1 else latest - tokens_change = float(latest['tokens_per_second'] - previous['tokens_per_second']) - if not latest_data: # Only add timestamp and commit info if not already added - latest_data.update({ - 'timestamp': latest['timestamp'].isoformat(), - 'commit_hash': latest['commit_hash'], - 'commit_url': latest['commit_url'], - }) - latest_data.update({ - 'tokens_per_second': float(latest['tokens_per_second']), - 'tokens_change': tokens_change - }) - - if latest_data: - with open(output_dir / 'latest_data.json', 'w') as f: - json.dump(latest_data, f, indent=2) - - self._print_summary(latest_data) - self.logger.info(f"Report generated in {output_dir}") - return str(output_dir) - - return None - - def _print_summary(self, latest_data: Dict): - print("\n=== Package Size Summary ===") - print(f"Timestamp: {latest_data['timestamp']}") - print(f"Commit: {latest_data['commit_hash'][:7]}") - - if 'total_size_mb' in latest_data: - print(f"Total Size: {latest_data['total_size_mb']:.2f}MB") - change = latest_data['size_change_mb'] - change_symbol = "↓" if change <= 0 else "↑" - print(f"Change: {change_symbol} {abs(change):.2f}MB") - - if latest_data.get('packages'): - print("\nTop 5 Largest Packages:") - sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True) - for pkg in sorted_packages[:5]: - print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB") - - if 'total_lines' in latest_data: - print("\nLine Count Stats:") - print(f"Total Lines: {latest_data['total_lines']:,}") - change = latest_data['linecount_change'] - change_symbol = "↓" if change <= 0 else "↑" - print(f"Change: {change_symbol} {abs(change):,}") - - if 'tokens_per_second' in latest_data: - print("\nBenchmark Stats:") - print(f"Tokens per Second: {latest_data['tokens_per_second']:.2f}") - if 'time_to_first_token' in latest_data: - print(f"Time to First Token: {latest_data['time_to_first_token']:.3f}s") - - print("\n") - - def _calculate_data_hash(self, data: List[Dict]) -> str: - """Calculate a hash of the data to detect changes""" - return hash(str(sorted([ - (d.get('commit_hash'), d.get('timestamp')) - for d in data - ]))) - - def _play_sound(self, sound_key: str): - """Play a specific notification sound using pygame""" - try: - sound_path = self.sounds.get(sound_key) - if sound_path and sound_path.exists(): - sound = pygame.mixer.Sound(str(sound_path)) - sound.play() - # Wait for the sound to finish playing - pygame.time.wait(int(sound.get_length() * 1000)) - else: - self.logger.warning(f"Sound file not found: {sound_key} at {sound_path}") - except Exception as e: - self.logger.error(f"Failed to play sound {sound_key}: {e}") - - def _check_metrics_changes(self, current_data: List[Dict], previous_data: List[Dict]): - # Sort data by timestamp in descending order (most recent first) - def sort_by_timestamp(data): - return sorted( - data, - key=lambda x: x.get('timestamp', ''), - reverse=True # Most recent first - ) - - current_data = sort_by_timestamp(current_data) - previous_data = sort_by_timestamp(previous_data) - - # Helper to find latest entry with a specific metric - def find_latest_with_metric(data: List[Dict], metric: str) -> Optional[Dict]: - return next((d for d in data if metric in d), None) - - # Check line count changes - current_lines = find_latest_with_metric(current_data, 'total_lines') - previous_lines = find_latest_with_metric(previous_data, 'total_lines') - - if current_lines and previous_lines: - diff = current_lines['total_lines'] - previous_lines['total_lines'] - self.logger.debug(f"Lines of code diff: {diff}") - if diff > 0: - self.logger.info(f"Lines of code increased by {diff:,}") - self._play_sound('lines_up') - elif diff < 0: - self.logger.info(f"Lines of code decreased by {abs(diff):,}") - self._play_sound('lines_down') - else: - self.logger.debug("No lines of code data found") - - # Check tokens per second changes - current_tokens = find_latest_with_metric(current_data, 'tokens_per_second') - previous_tokens = find_latest_with_metric(previous_data, 'tokens_per_second') - - if current_tokens and previous_tokens: - diff = current_tokens['tokens_per_second'] - previous_tokens['tokens_per_second'] - self.logger.debug(f"Tokens per second diff: {diff}") - if diff > 0: - self.logger.info(f"Tokens per second increased by {diff:.2f}") - self._play_sound('tokens_up') - elif diff < 0: - self.logger.info(f"Tokens per second decreased by {abs(diff):.2f}") - self._play_sound('tokens_down') - else: - self.logger.debug("No tokens per second data found") - - # Check package size changes - current_size = find_latest_with_metric(current_data, 'total_size_mb') - previous_size = find_latest_with_metric(previous_data, 'total_size_mb') - - if current_size and previous_size: - diff = current_size['total_size_mb'] - previous_size['total_size_mb'] - self.logger.debug(f"Package size diff: {diff:.2f}MB") - if diff > 0: - self.logger.info(f"Package size increased by {diff:.2f}MB") - self._play_sound('size_up') - elif diff < 0: - self.logger.info(f"Package size decreased by {abs(diff):.2f}MB") - self._play_sound('size_down') - else: - self.logger.debug("No package size data found") - - async def run_dashboard(self, update_interval: int = 10): - """Run the dashboard with periodic updates""" - try: - update_interval = float(update_interval) - self.logger.debug(f"Update interval type: {type(update_interval)}, value: {update_interval}") - except ValueError as e: - self.logger.error(f"Failed to convert update_interval to float: {update_interval}") - raise - - self.logger.info(f"Starting real-time dashboard with {update_interval}s updates") - previous_data = None - - while True: - try: - start_time = time.time() - - # Collect new data - current_data = await self.collect_data() - if not current_data: - self.logger.warning("No data collected") - await asyncio.sleep(update_interval) - continue - - # Generate report - report_path = self.generate_report(current_data) - if report_path: - self.logger.info( - f"Dashboard updated at {datetime.now().strftime('%H:%M:%S')}" - ) - - print("Curr:", len(current_data)) - print("Prev:", len(previous_data) if previous_data else "None") - if previous_data: - # Check for metric changes and play appropriate sounds - self.logger.debug(f"Checking metrics changes between {len(current_data)} current and {len(previous_data)} previous data points") - self._check_metrics_changes(current_data, previous_data) - - # Update previous data - previous_data = current_data.copy() # Make a copy to prevent reference issues - - # Calculate sleep time - elapsed = float(time.time() - start_time) - sleep_time = max(0.0, update_interval - elapsed) - await asyncio.sleep(sleep_time) - - except Exception as e: - self.logger.error(f"Error in dashboard update loop: {e}", exc_info=True) - if self.debug: - raise - await asyncio.sleep(update_interval) - -async def main(): - token = os.getenv("CIRCLECI_TOKEN") - project_slug = os.getenv("CIRCLECI_PROJECT_SLUG") - debug = os.getenv("DEBUG", "").lower() in ("true", "1", "yes") - - try: - # Get update interval from environment or use default - update_interval = float(os.getenv("UPDATE_INTERVAL", "10")) - print(f"Update interval type: {type(update_interval)}, value: {update_interval}") # Debug print - except ValueError as e: - print(f"Error converting UPDATE_INTERVAL to float: {os.getenv('UPDATE_INTERVAL')}") - update_interval = 10.0 - - if not token or not project_slug: - print("Error: Please set CIRCLECI_TOKEN and CIRCLECI_PROJECT_SLUG environment variables") - return - - tracker = PackageSizeTracker(token, project_slug, debug) - - try: - await tracker.run_dashboard(update_interval) - except KeyboardInterrupt: - print("\nDashboard stopped by user") - except Exception as e: - logging.error(f"Error: {str(e)}", exc_info=True) - if debug: - raise +from collections import defaultdict +import os -if __name__ == "__main__": - asyncio.run(main()) +s3 = boto3.client('s3') +BUCKET_NAME = 'exo-benchmarks' + +def load_mock_data(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + mock_data_path = os.path.join(current_dir, 'mock_data.json') + with open(mock_data_path, 'r') as f: + return json.load(f) + +def load_data_from_s3(): + # For testing, use mock data if environment variable is set + if os.getenv('USE_MOCK_DATA'): + return load_mock_data() + + config_data = defaultdict(list) + + paginator = s3.get_paginator('list_objects_v2') + for page in paginator.paginate(Bucket=BUCKET_NAME): + for obj in page.get('Contents', []): + key = obj['Key'] + config_name = key.split('/')[0] + response = s3.get_object(Bucket=BUCKET_NAME, Key=key) + data = json.loads(response['Body'].read().decode('utf-8')) + print(f"Processing object: {obj['Key']}: {data}") + config_data[config_name].append({ + 'timestamp': data.get('timestamp', obj['LastModified'].strftime('%Y-%m-%dT%H:%M:%S')), + 'prompt_tps': data.get('prompt_tps', 0), + 'generation_tps': data.get('generation_tps', 0), + 'commit': data.get('commit', ''), + 'run_id': data.get('run_id', '') + }) + + for config in config_data: + config_data[config].sort(key=lambda x: x['timestamp']) + + return config_data + +app = dash.Dash(__name__) + +app.layout = html.Div([ + html.H1('Benchmark Performance Dashboard'), + html.Div(id='graphs-container'), + dcc.Interval( + id='interval-component', + interval=300000, # Update every 5 minutes + n_intervals=0 + ) +]) + +@app.callback( + Output('graphs-container', 'children'), + Input('interval-component', 'n_intervals') +) +def update_graphs(n): + config_data = load_data_from_s3() + graphs = [] + + for config_name, data in config_data.items(): + timestamps = [d['timestamp'] for d in data] + prompt_tps = [d['prompt_tps'] for d in data] + generation_tps = [d['generation_tps'] for d in data] + commits = [d['commit'] for d in data] + run_ids = [d['run_id'] for d in data] + + fig = go.Figure() + + fig.add_trace(go.Scatter( + x=timestamps, + y=prompt_tps, + name='Prompt TPS', + mode='lines+markers', + hovertemplate='Commit: %{text}
TPS: %{y}', + text=commits, + customdata=run_ids + )) + + fig.add_trace(go.Scatter( + x=timestamps, + y=generation_tps, + name='Generation TPS', + mode='lines+markers', + hovertemplate='Commit: %{text}
TPS: %{y}', + text=commits, + customdata=run_ids + )) + + fig.update_layout( + title=f'Performance Metrics - {config_name}', + xaxis_title='Timestamp', + yaxis_title='Tokens per Second', + hovermode='x unified', + clickmode='event' + ) + + graphs.append(html.Div([ + dcc.Graph( + figure=fig, + id={'type': 'dynamic-graph', 'index': config_name}, + config={'displayModeBar': True} + ) + ])) + + return graphs + +@app.callback( + Output('_', 'children'), + Input({'type': 'dynamic-graph', 'index': dash.ALL}, 'clickData') +) +def handle_click(clickData): + if clickData and clickData['points'][0].get('customdata'): + run_id = clickData['points'][0]['customdata'] + url = f'https://github.com/exo-explore/exo/actions/runs/{run_id}' + import webbrowser + webbrowser.open_new_tab(url) + return dash.no_update + +if __name__ == '__main__': + app.run_server(debug=True) diff --git a/extra/dashboard/mock_data.json b/extra/dashboard/mock_data.json new file mode 100644 index 000000000..f6738eeda --- /dev/null +++ b/extra/dashboard/mock_data.json @@ -0,0 +1,54 @@ +{ + "config1": [ + { + "timestamp": "2024-03-01T10:00:00", + "prompt_tps": 150.5, + "generation_tps": 120.3, + "commit": "abc123", + "run_id": "12345678", + "configuration": { + "M4_Pro_16GB": 1, + "M4_Pro_24GB": 2, + "M4_32GB": 1 + } + }, + { + "timestamp": "2024-03-02T10:00:00", + "prompt_tps": 155.2, + "generation_tps": 125.1, + "commit": "def456", + "run_id": "23456789", + "configuration": { + "M4_Pro_16GB": 1, + "M4_Pro_24GB": 2, + "M4_32GB": 1 + } + } + ], + "config2": [ + { + "timestamp": "2024-03-01T10:00:00", + "prompt_tps": 140.8, + "generation_tps": 110.5, + "commit": "ghi789", + "run_id": "34567890", + "configuration": { + "M4_Pro_16GB": 1, + "M4_Pro_24GB": 2, + "M4_32GB": 1 + } + }, + { + "timestamp": "2024-03-02T10:00:00", + "prompt_tps": 145.6, + "generation_tps": 115.2, + "commit": "jkl012", + "run_id": "45678901", + "configuration": { + "M4_Pro_16GB": 1, + "M4_Pro_24GB": 2, + "M4_32GB": 1 + } + } + ] +} \ No newline at end of file diff --git a/extra/dashboard/requirements.txt b/extra/dashboard/requirements.txt index 7b978bc0e..5f1621133 100644 --- a/extra/dashboard/requirements.txt +++ b/extra/dashboard/requirements.txt @@ -1,5 +1,2 @@ -plotly -pandas -requests -aiohttp -pygame \ No newline at end of file +boto3==1.35.76 +dash==2.18.2 From 2f0b974dd5938de0ecbe84408336e8ceadfbfc14 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 18:25:42 +0000 Subject: [PATCH 27/42] test depot runner --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8ef7cd244..e5b0e97f2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -14,7 +14,7 @@ env: jobs: check_line_count: - runs-on: ubuntu-latest + runs-on: depot-ubuntu-22.04-4 steps: - uses: actions/checkout@v4 with: From 61deb32404cbfc9e08c181de867d9d5cd64c344a Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 18:43:08 +0000 Subject: [PATCH 28/42] t From b1a386af02c493167f2eab35f786d5d35d1395c3 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 18:48:14 +0000 Subject: [PATCH 29/42] add back mlx, use depot runners --- .github/workflows/build_and_test.yml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e5b0e97f2..dfa0e56b2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -50,7 +50,7 @@ jobs: line-count-diff.json unit_test: - runs-on: macos-15 + runs-on: depot-macos-latest timeout-minutes: 20 steps: - uses: actions/checkout@v4 @@ -87,7 +87,7 @@ jobs: python3 ./test/test_model_helpers.py discovery_integration_test: - runs-on: ubuntu-latest + runs-on: depot-ubuntu-22.04-4 steps: - uses: actions/checkout@v4 @@ -128,16 +128,15 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'depot-ubuntu-22.04-4' || 'depot-macos-latest' }} strategy: matrix: - # inference_engine: [mlx, tinygrad, dummy] - inference_engine: [tinygrad, dummy] + inference_engine: [mlx, tinygrad, dummy] include: - # - inference_engine: mlx - # model_id: llama-3.2-1b - # prompt: "Keep responses concise. Who was the king of pop?" - # expected_output: "Michael Jackson" + - inference_engine: mlx + model_id: llama-3.2-1b + prompt: "Keep responses concise. Who was the king of pop?" + expected_output: "Michael Jackson" - inference_engine: tinygrad model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" @@ -273,7 +272,7 @@ jobs: fi measure_pip_sizes: - runs-on: macos-15 + runs-on: depot-macos-latest steps: - uses: actions/checkout@v4 From a44bf6fdc4f3375407df8d8ef05b6617b77d2ba7 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 18:51:08 +0000 Subject: [PATCH 30/42] t From 45b3582f131eea24bb6e40d85f2986b1cecec4ed Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 19:02:59 +0000 Subject: [PATCH 31/42] tiny tweaks --- .github/workflows/build_and_test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index dfa0e56b2..c78504c1e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -167,8 +167,11 @@ jobs: - name: Run ChatGPT API test env: TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} - SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }} + SUPPORT_BF16: '0' CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} + METAL_DEBUG_ERROR_MODE: '0' + METAL_DEVICE_WRAPPER_TYPE: '1' + METAL_XCODE: '1' run: | source env/bin/activate From 8f259e7c1efde4bbccab8888f473bec105523064 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 20:03:56 +0000 Subject: [PATCH 32/42] own runner test --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c78504c1e..b03430c89 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -128,7 +128,7 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'depot-ubuntu-22.04-4' || 'depot-macos-latest' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'M4PRO_GPU16_24GB' || 'M4PRO_GPU16_24GB' }} strategy: matrix: inference_engine: [mlx, tinygrad, dummy] From 750bfb9d1025e0685f726b277ddcffa3725eca63 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 20:09:36 +0000 Subject: [PATCH 33/42] use depot runners --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b03430c89..c78504c1e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -128,7 +128,7 @@ jobs: fi chatgpt_api_tests: - runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'M4PRO_GPU16_24GB' || 'M4PRO_GPU16_24GB' }} + runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'depot-ubuntu-22.04-4' || 'depot-macos-latest' }} strategy: matrix: inference_engine: [mlx, tinygrad, dummy] From d953f6f538a958f4a32b397cc6430c15b3e419e7 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 21:17:06 +0000 Subject: [PATCH 34/42] add model to benchmark key --- extra/dashboard/dashboard.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index df047c3f7..0d1f11faf 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -20,14 +20,17 @@ def load_data_from_s3(): # For testing, use mock data if environment variable is set if os.getenv('USE_MOCK_DATA'): return load_mock_data() - + config_data = defaultdict(list) - + paginator = s3.get_paginator('list_objects_v2') for page in paginator.paginate(Bucket=BUCKET_NAME): for obj in page.get('Contents', []): key = obj['Key'] - config_name = key.split('/')[0] + key_parts = key.split('/') + if len(key_parts) < 2: + continue + config_name = f"{key_parts[0]}/{key_parts[1]}" # Include both config and model response = s3.get_object(Bucket=BUCKET_NAME, Key=key) data = json.loads(response['Body'].read().decode('utf-8')) print(f"Processing object: {obj['Key']}: {data}") @@ -38,10 +41,10 @@ def load_data_from_s3(): 'commit': data.get('commit', ''), 'run_id': data.get('run_id', '') }) - + for config in config_data: config_data[config].sort(key=lambda x: x['timestamp']) - + return config_data app = dash.Dash(__name__) @@ -63,16 +66,16 @@ def load_data_from_s3(): def update_graphs(n): config_data = load_data_from_s3() graphs = [] - + for config_name, data in config_data.items(): timestamps = [d['timestamp'] for d in data] prompt_tps = [d['prompt_tps'] for d in data] generation_tps = [d['generation_tps'] for d in data] commits = [d['commit'] for d in data] run_ids = [d['run_id'] for d in data] - + fig = go.Figure() - + fig.add_trace(go.Scatter( x=timestamps, y=prompt_tps, @@ -82,7 +85,7 @@ def update_graphs(n): text=commits, customdata=run_ids )) - + fig.add_trace(go.Scatter( x=timestamps, y=generation_tps, @@ -92,7 +95,7 @@ def update_graphs(n): text=commits, customdata=run_ids )) - + fig.update_layout( title=f'Performance Metrics - {config_name}', xaxis_title='Timestamp', @@ -100,7 +103,7 @@ def update_graphs(n): hovermode='x unified', clickmode='event' ) - + graphs.append(html.Div([ dcc.Graph( figure=fig, @@ -108,7 +111,7 @@ def update_graphs(n): config={'displayModeBar': True} ) ])) - + return graphs @app.callback( From 54d3c823b94bc4fc1fe5d08948213ad4919b5bca Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sun, 8 Dec 2024 21:53:40 +0000 Subject: [PATCH 35/42] dash sounds --- .../{sounds => assets}/gta5_wasted.mp3 | 0 .../{sounds => assets}/pokemon_evolve.mp3 | 0 extra/dashboard/dashboard.py | 153 ++++++++++++++++-- extra/dashboard/requirements.txt | 2 + 4 files changed, 139 insertions(+), 16 deletions(-) rename extra/dashboard/{sounds => assets}/gta5_wasted.mp3 (100%) rename extra/dashboard/{sounds => assets}/pokemon_evolve.mp3 (100%) diff --git a/extra/dashboard/sounds/gta5_wasted.mp3 b/extra/dashboard/assets/gta5_wasted.mp3 similarity index 100% rename from extra/dashboard/sounds/gta5_wasted.mp3 rename to extra/dashboard/assets/gta5_wasted.mp3 diff --git a/extra/dashboard/sounds/pokemon_evolve.mp3 b/extra/dashboard/assets/pokemon_evolve.mp3 similarity index 100% rename from extra/dashboard/sounds/pokemon_evolve.mp3 rename to extra/dashboard/assets/pokemon_evolve.mp3 diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index 0d1f11faf..009a9efc2 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -1,11 +1,15 @@ import dash -from dash import html, dcc +from dash import html, dcc, ctx import plotly.graph_objs as go -from dash.dependencies import Input, Output +from dash.dependencies import Input, Output, State import boto3 import json from collections import defaultdict import os +import base64 +import numpy as np +from plotly.subplots import make_subplots +import plotly.express as px s3 = boto3.client('s3') BUCKET_NAME = 'exo-benchmarks' @@ -51,21 +55,55 @@ def load_data_from_s3(): app.layout = html.Div([ html.H1('Benchmark Performance Dashboard'), + html.Button('Test Sound', id='test-sound-button', n_clicks=0), html.Div(id='graphs-container'), + html.Audio(id='success-sound', src='assets/pokemon_evolve.mp3', preload="auto", style={'display': 'none'}), + html.Audio(id='failure-sound', src='assets/gta5_wasted.mp3', preload="auto", style={'display': 'none'}), + html.Audio(id='startup-sound', src='assets/pokemon_evolve.mp3', preload="auto", style={'display': 'none'}), + html.Div(id='audio-trigger', style={'display': 'none'}), + dcc.Store(id='previous-data', storage_type='memory'), dcc.Interval( id='interval-component', - interval=300000, # Update every 5 minutes + interval=10000, # Update every 10 seconds n_intervals=0 ) ]) @app.callback( - Output('graphs-container', 'children'), - Input('interval-component', 'n_intervals') + [Output('graphs-container', 'children'), + Output('previous-data', 'data'), + Output('audio-trigger', 'children')], + [Input('interval-component', 'n_intervals')], + [State('previous-data', 'data')] ) -def update_graphs(n): +def update_graphs(n, previous_data): config_data = load_data_from_s3() graphs = [] + trigger_sound = None + + if previous_data: + for config_name, data in config_data.items(): + if config_name in previous_data and data and previous_data[config_name]: + current_prompt_tps = data[-1]['prompt_tps'] + previous_prompt_tps = previous_data[config_name][-1]['prompt_tps'] + + # Add clear logging for TPS changes + if current_prompt_tps != previous_prompt_tps: + print("\n" + "="*50) + print(f"Config: {config_name}") + print(f"Previous TPS: {previous_prompt_tps}") + print(f"Current TPS: {current_prompt_tps}") + print(f"Change: {current_prompt_tps - previous_prompt_tps}") + + if current_prompt_tps > previous_prompt_tps: + print("🔼 TPS INCREASED - Should play success sound") + trigger_sound = 'success' + elif current_prompt_tps < previous_prompt_tps: + print("🔽 TPS DECREASED - Should play failure sound") + trigger_sound = 'failure' + + if current_prompt_tps != previous_prompt_tps: + print("="*50 + "\n") for config_name, data in config_data.items(): timestamps = [d['timestamp'] for d in data] @@ -74,8 +112,12 @@ def update_graphs(n): commits = [d['commit'] for d in data] run_ids = [d['run_id'] for d in data] - fig = go.Figure() + # Create subplot with 2 columns + fig = make_subplots(rows=1, cols=2, + subplot_titles=('Performance Over Time', 'Generation TPS Distribution'), + column_widths=[0.7, 0.3]) + # Time series plot (left) fig.add_trace(go.Scatter( x=timestamps, y=prompt_tps, @@ -84,7 +126,7 @@ def update_graphs(n): hovertemplate='Commit: %{text}
TPS: %{y}', text=commits, customdata=run_ids - )) + ), row=1, col=1) fig.add_trace(go.Scatter( x=timestamps, @@ -94,16 +136,55 @@ def update_graphs(n): hovertemplate='Commit: %{text}
TPS: %{y}', text=commits, customdata=run_ids - )) + ), row=1, col=1) + + # Calculate statistics + gen_tps_array = np.array(generation_tps) + stats = { + 'Mean': np.mean(gen_tps_array), + 'Std Dev': np.std(gen_tps_array), + 'Min': np.min(gen_tps_array), + 'Max': np.max(gen_tps_array) + } + + # Histogram plot (right) + fig.add_trace(go.Histogram( + x=generation_tps, + name='Generation TPS Distribution', + nbinsx=10, + showlegend=False + ), row=1, col=2) + + # Add statistics as annotations + stats_text = '
'.join([f'{k}: {v:.2f}' for k, v in stats.items()]) + fig.add_annotation( + x=0.98, + y=0.98, + xref='paper', + yref='paper', + text=stats_text, + showarrow=False, + font=dict(size=12), + align='left', + bgcolor='rgba(255, 255, 255, 0.8)', + bordercolor='black', + borderwidth=1 + ) fig.update_layout( title=f'Performance Metrics - {config_name}', - xaxis_title='Timestamp', - yaxis_title='Tokens per Second', + height=500, + showlegend=True, hovermode='x unified', clickmode='event' ) + # Update x and y axis labels + fig.update_xaxes(title_text='Timestamp', row=1, col=1) + fig.update_xaxes(title_text='Generation TPS', row=1, col=2) + fig.update_yaxes(title_text='Tokens per Second', row=1, col=1) + fig.update_yaxes(title_text='Count', row=1, col=2) + graphs.append(html.Div([ dcc.Graph( figure=fig, @@ -112,19 +193,59 @@ def update_graphs(n): ) ])) - return graphs + return graphs, config_data, trigger_sound @app.callback( - Output('_', 'children'), - Input({'type': 'dynamic-graph', 'index': dash.ALL}, 'clickData') + Output('graphs-container', 'children', allow_duplicate=True), + Input({'type': 'dynamic-graph', 'index': dash.ALL}, 'clickData'), + prevent_initial_call=True ) def handle_click(clickData): - if clickData and clickData['points'][0].get('customdata'): - run_id = clickData['points'][0]['customdata'] + if clickData and clickData[0] and clickData[0]['points'][0].get('customdata'): + run_id = clickData[0]['points'][0]['customdata'] url = f'https://github.com/exo-explore/exo/actions/runs/{run_id}' import webbrowser webbrowser.open_new_tab(url) return dash.no_update +app.clientside_callback( + """ + function(trigger, test_clicks) { + if (!trigger && !test_clicks) return window.dash_clientside.no_update; + + if (test_clicks > 0 && dash_clientside.callback_context.triggered[0].prop_id.includes('test-sound-button')) { + console.log('Test button clicked'); + const audio = document.getElementById('startup-sound'); + if (audio) { + audio.currentTime = 0; + audio.play().catch(e => console.log('Error playing audio:', e)); + } + } else if (trigger) { + console.log('Audio trigger received:', trigger); + if (trigger === 'success') { + console.log('Playing success sound'); + const audio = document.getElementById('success-sound'); + if (audio) { + audio.currentTime = 0; + audio.play().catch(e => console.log('Error playing success sound:', e)); + } + } else if (trigger === 'failure') { + console.log('Playing failure sound'); + const audio = document.getElementById('failure-sound'); + if (audio) { + audio.currentTime = 0; + audio.play().catch(e => console.log('Error playing failure sound:', e)); + } + } + } + return window.dash_clientside.no_update; + } + """, + Output('audio-trigger', 'children', allow_duplicate=True), + [Input('audio-trigger', 'children'), + Input('test-sound-button', 'n_clicks')], + prevent_initial_call=True +) + if __name__ == '__main__': app.run_server(debug=True) diff --git a/extra/dashboard/requirements.txt b/extra/dashboard/requirements.txt index 5f1621133..5a49ed1a8 100644 --- a/extra/dashboard/requirements.txt +++ b/extra/dashboard/requirements.txt @@ -1,2 +1,4 @@ boto3==1.35.76 dash==2.18.2 +numpy +pandas From 16651a350639ebebb0c9accd950c938d72afea6b Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Wed, 11 Dec 2024 11:04:25 +0000 Subject: [PATCH 36/42] dashboard tweaks --- extra/dashboard/dashboard.py | 40 +++++++++++++++--------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index 009a9efc2..56bdda785 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -84,30 +84,29 @@ def update_graphs(n, previous_data): if previous_data: for config_name, data in config_data.items(): if config_name in previous_data and data and previous_data[config_name]: - current_prompt_tps = data[-1]['prompt_tps'] - previous_prompt_tps = previous_data[config_name][-1]['prompt_tps'] + current_generation_tps = data[-1]['generation_tps'] + previous_generation_tps = previous_data[config_name][-1]['generation_tps'] # Add clear logging for TPS changes - if current_prompt_tps != previous_prompt_tps: + if current_generation_tps != previous_generation_tps: print("\n" + "="*50) print(f"Config: {config_name}") - print(f"Previous TPS: {previous_prompt_tps}") - print(f"Current TPS: {current_prompt_tps}") - print(f"Change: {current_prompt_tps - previous_prompt_tps}") + print(f"Previous Generation TPS: {previous_generation_tps}") + print(f"Current Generation TPS: {current_generation_tps}") + print(f"Change: {current_generation_tps - previous_generation_tps}") - if current_prompt_tps > previous_prompt_tps: - print("🔼 TPS INCREASED - Should play success sound") + if current_generation_tps > previous_generation_tps: + print("🔼 Generation TPS INCREASED - Should play success sound") trigger_sound = 'success' - elif current_prompt_tps < previous_prompt_tps: - print("🔽 TPS DECREASED - Should play failure sound") + elif current_generation_tps < previous_generation_tps: + print("🔽 Generation TPS DECREASED - Should play failure sound") trigger_sound = 'failure' - if current_prompt_tps != previous_prompt_tps: + if current_generation_tps != previous_generation_tps: print("="*50 + "\n") for config_name, data in config_data.items(): timestamps = [d['timestamp'] for d in data] - prompt_tps = [d['prompt_tps'] for d in data] generation_tps = [d['generation_tps'] for d in data] commits = [d['commit'] for d in data] run_ids = [d['run_id'] for d in data] @@ -118,16 +117,6 @@ def update_graphs(n, previous_data): column_widths=[0.7, 0.3]) # Time series plot (left) - fig.add_trace(go.Scatter( - x=timestamps, - y=prompt_tps, - name='Prompt TPS', - mode='lines+markers', - hovertemplate='Commit: %{text}
TPS: %{y}', - text=commits, - customdata=run_ids - ), row=1, col=1) - fig.add_trace(go.Scatter( x=timestamps, y=generation_tps, @@ -135,7 +124,9 @@ def update_graphs(n, previous_data): mode='lines+markers', hovertemplate='Commit: %{text}
TPS: %{y}', text=commits, - customdata=run_ids + customdata=run_ids, + line=dict(color='#2196F3', width=2), + marker=dict(color='#2196F3') ), row=1, col=1) # Calculate statistics @@ -152,7 +143,8 @@ def update_graphs(n, previous_data): x=generation_tps, name='Generation TPS Distribution', nbinsx=10, - showlegend=False + showlegend=False, + marker=dict(color='#2196F3') ), row=1, col=2) # Add statistics as annotations From 2dbb5e177e9db9e64804927a6111438d2072a0b8 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Wed, 11 Dec 2024 15:37:10 +0000 Subject: [PATCH 37/42] more robust configure_mlx.sh --- configure_mlx.sh | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/configure_mlx.sh b/configure_mlx.sh index 8a5b67378..f3469c820 100755 --- a/configure_mlx.sh +++ b/configure_mlx.sh @@ -3,10 +3,27 @@ # Get the total memory in MB TOTAL_MEM_MB=$(($(sysctl -n hw.memsize) / 1024 / 1024)) -# Set WIRED_LIMIT_MB to 80% -WIRED_LIMIT_MB=$(($TOTAL_MEM_MB * 80 / 100)) -# Set WIRED_LWM_MB to 70% -WIRED_LWM_MB=$(($TOTAL_MEM_MB * 70 / 100)) +# Calculate 80% and TOTAL_MEM_GB-5GB in MB +EIGHTY_PERCENT=$(($TOTAL_MEM_MB * 80 / 100)) +MINUS_5GB=$((($TOTAL_MEM_MB - 5120))) + +# Calculate 70% and TOTAL_MEM_GB-8GB in MB +SEVENTY_PERCENT=$(($TOTAL_MEM_MB * 70 / 100)) +MINUS_8GB=$((($TOTAL_MEM_MB - 8192))) + +# Set WIRED_LIMIT_MB to higher value +if [ $EIGHTY_PERCENT -gt $MINUS_5GB ]; then + WIRED_LIMIT_MB=$EIGHTY_PERCENT +else + WIRED_LIMIT_MB=$MINUS_5GB +fi + +# Set WIRED_LWM_MB to higher value +if [ $SEVENTY_PERCENT -gt $MINUS_8GB ]; then + WIRED_LWM_MB=$SEVENTY_PERCENT +else + WIRED_LWM_MB=$MINUS_8GB +fi # Display the calculated values echo "Total memory: $TOTAL_MEM_MB MB" From f12487b81a6b0afed9a4ec37df2d7f43e4b8e6d4 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 12 Dec 2024 14:32:52 +0000 Subject: [PATCH 38/42] add --generate option to upload best.json to s3 with best benchmark results --- extra/dashboard/dashboard.py | 54 ++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index 56bdda785..c460c5955 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -43,7 +43,14 @@ def load_data_from_s3(): 'prompt_tps': data.get('prompt_tps', 0), 'generation_tps': data.get('generation_tps', 0), 'commit': data.get('commit', ''), - 'run_id': data.get('run_id', '') + 'run_id': data.get('run_id', ''), + 'model': data.get('model', ''), + 'branch': data.get('branch', ''), + 'configuration': data.get('configuration', {}), + 'prompt_len': data.get('prompt_len', 0), + 'ttft': data.get('ttft', 0), + 'response_len': data.get('response_len', 0), + 'total_time': data.get('total_time', 0) }) for config in config_data: @@ -51,6 +58,31 @@ def load_data_from_s3(): return config_data +def get_best_benchmarks(): + config_data = load_data_from_s3() + best_results = {} + + for config_name, data in config_data.items(): + if not data: + continue + + # Split config_name into config and model + config, model = config_name.split('/') + + # Find the entry with the highest generation_tps + best_result = max(data, key=lambda x: x['generation_tps']) + + # Create result dictionary with all original data plus config/model info + result = dict(best_result) # Make a copy of all data from the best run + result.update({ + 'config': config, + 'model': model, + }) + + best_results[config_name] = result + + return best_results + app = dash.Dash(__name__) app.layout = html.Div([ @@ -240,4 +272,22 @@ def handle_click(clickData): ) if __name__ == '__main__': - app.run_server(debug=True) + import sys + if '--generate' in sys.argv: + best_benchmarks = get_best_benchmarks() + print(json.dumps(best_benchmarks, indent=2)) + + # Upload best benchmarks to S3 + try: + s3.put_object( + Bucket=BUCKET_NAME, + Key='best.json', + Body=json.dumps(best_benchmarks, indent=2), + ContentType='application/json' + ) + print("Successfully uploaded best.json to S3") + print(f"Public URL: https://{BUCKET_NAME}.s3.amazonaws.com/best.json") + except Exception as e: + print(f"Error uploading to S3: {e}") + else: + app.run_server(debug=True) From 6016e1185fbe1cea0bbf1106499e1d575ba57ce0 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 12 Dec 2024 18:50:34 +0000 Subject: [PATCH 39/42] 100x faster dashboard --- extra/dashboard/dashboard.py | 134 +++++++++++++++++++------------ extra/dashboard/requirements.txt | 3 +- 2 files changed, 83 insertions(+), 54 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index c460c5955..fdab4980d 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -2,7 +2,9 @@ from dash import html, dcc, ctx import plotly.graph_objs as go from dash.dependencies import Input, Output, State -import boto3 +import aioboto3 +import asyncio +from aiohttp import ClientSession import json from collections import defaultdict import os @@ -11,7 +13,9 @@ from plotly.subplots import make_subplots import plotly.express as px -s3 = boto3.client('s3') +# Replace boto3 client with aioboto3 session +session = aioboto3.Session() + BUCKET_NAME = 'exo-benchmarks' def load_mock_data(): @@ -20,46 +24,67 @@ def load_mock_data(): with open(mock_data_path, 'r') as f: return json.load(f) -def load_data_from_s3(): +async def load_data_from_s3(): # For testing, use mock data if environment variable is set if os.getenv('USE_MOCK_DATA'): return load_mock_data() config_data = defaultdict(list) - paginator = s3.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=BUCKET_NAME): - for obj in page.get('Contents', []): - key = obj['Key'] - key_parts = key.split('/') - if len(key_parts) < 2: - continue - config_name = f"{key_parts[0]}/{key_parts[1]}" # Include both config and model - response = s3.get_object(Bucket=BUCKET_NAME, Key=key) - data = json.loads(response['Body'].read().decode('utf-8')) - print(f"Processing object: {obj['Key']}: {data}") - config_data[config_name].append({ - 'timestamp': data.get('timestamp', obj['LastModified'].strftime('%Y-%m-%dT%H:%M:%S')), - 'prompt_tps': data.get('prompt_tps', 0), - 'generation_tps': data.get('generation_tps', 0), - 'commit': data.get('commit', ''), - 'run_id': data.get('run_id', ''), - 'model': data.get('model', ''), - 'branch': data.get('branch', ''), - 'configuration': data.get('configuration', {}), - 'prompt_len': data.get('prompt_len', 0), - 'ttft': data.get('ttft', 0), - 'response_len': data.get('response_len', 0), - 'total_time': data.get('total_time', 0) - }) - - for config in config_data: - config_data[config].sort(key=lambda x: x['timestamp']) - - return config_data - -def get_best_benchmarks(): - config_data = load_data_from_s3() + async with session.client('s3') as s3: + paginator = s3.get_paginator('list_objects_v2') + objects_to_fetch = [] + + # First, get all object keys + async for page in paginator.paginate(Bucket=BUCKET_NAME): + for obj in page.get('Contents', []): + key = obj['Key'] + key_parts = key.split('/') + if len(key_parts) < 2: + continue + objects_to_fetch.append((key, obj['LastModified'], f"{key_parts[0]}/{key_parts[1]}")) + + # Then fetch all objects in parallel + async def fetch_object(key, last_modified, config_name): + response = await s3.get_object(Bucket=BUCKET_NAME, Key=key) + body = await response['Body'].read() + data = json.loads(body.decode('utf-8')) + print(f"Processing object: {key}: {data}") + return { + 'config_name': config_name, + 'data': { + 'timestamp': data.get('timestamp', last_modified.strftime('%Y-%m-%dT%H:%M:%S')), + 'prompt_tps': data.get('prompt_tps', 0), + 'generation_tps': data.get('generation_tps', 0), + 'commit': data.get('commit', ''), + 'run_id': data.get('run_id', ''), + 'model': data.get('model', ''), + 'branch': data.get('branch', ''), + 'configuration': data.get('configuration', {}), + 'prompt_len': data.get('prompt_len', 0), + 'ttft': data.get('ttft', 0), + 'response_len': data.get('response_len', 0), + 'total_time': data.get('total_time', 0) + } + } + + # Create tasks for all objects + tasks = [fetch_object(key, last_modified, config_name) + for key, last_modified, config_name in objects_to_fetch] + results = await asyncio.gather(*tasks) + + # Organize results into config_data + for result in results: + config_data[result['config_name']].append(result['data']) + + # Sort data by timestamp for each config + for config in config_data: + config_data[config].sort(key=lambda x: x['timestamp']) + + return config_data + +async def get_best_benchmarks(): + config_data = await load_data_from_s3() best_results = {} for config_name, data in config_data.items(): @@ -96,7 +121,7 @@ def get_best_benchmarks(): dcc.Store(id='previous-data', storage_type='memory'), dcc.Interval( id='interval-component', - interval=10000, # Update every 10 seconds + interval=15000, # Update every 15 seconds n_intervals=0 ) ]) @@ -109,7 +134,8 @@ def get_best_benchmarks(): [State('previous-data', 'data')] ) def update_graphs(n, previous_data): - config_data = load_data_from_s3() + # Run async operations synchronously + config_data = asyncio.run(load_data_from_s3()) graphs = [] trigger_sound = None @@ -225,6 +251,7 @@ def update_graphs(n, previous_data): prevent_initial_call=True ) def handle_click(clickData): + # If you add any async operations here, wrap them with asyncio.run() if clickData and clickData[0] and clickData[0]['points'][0].get('customdata'): run_id = clickData[0]['points'][0]['customdata'] url = f'https://github.com/exo-explore/exo/actions/runs/{run_id}' @@ -274,20 +301,21 @@ def handle_click(clickData): if __name__ == '__main__': import sys if '--generate' in sys.argv: - best_benchmarks = get_best_benchmarks() - print(json.dumps(best_benchmarks, indent=2)) - - # Upload best benchmarks to S3 - try: - s3.put_object( - Bucket=BUCKET_NAME, - Key='best.json', - Body=json.dumps(best_benchmarks, indent=2), - ContentType='application/json' - ) - print("Successfully uploaded best.json to S3") - print(f"Public URL: https://{BUCKET_NAME}.s3.amazonaws.com/best.json") - except Exception as e: - print(f"Error uploading to S3: {e}") + async def generate_best(): + async with session.client('s3') as s3: + best_benchmarks = await get_best_benchmarks() + try: + await s3.put_object( + Bucket=BUCKET_NAME, + Key='best.json', + Body=json.dumps(best_benchmarks, indent=2), + ContentType='application/json' + ) + print("Successfully uploaded best.json to S3") + print(f"Public URL: https://{BUCKET_NAME}.s3.amazonaws.com/best.json") + except Exception as e: + print(f"Error uploading to S3: {e}") + + asyncio.run(generate_best()) else: app.run_server(debug=True) diff --git a/extra/dashboard/requirements.txt b/extra/dashboard/requirements.txt index 5a49ed1a8..ab2d3a551 100644 --- a/extra/dashboard/requirements.txt +++ b/extra/dashboard/requirements.txt @@ -1,4 +1,5 @@ -boto3==1.35.76 +aioboto3==13.2.0 dash==2.18.2 numpy pandas +aiohttp \ No newline at end of file From 0fa8f1f5bb4ac19621dd254e957c326fa4790dc1 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 13 Dec 2024 22:27:11 +0000 Subject: [PATCH 40/42] discord notifications on new benchmark runs --- extra/dashboard/dashboard.py | 115 ++++++++++++++++++++++++++++++----- 1 file changed, 100 insertions(+), 15 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index fdab4980d..d618ce4e3 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -12,11 +12,15 @@ import numpy as np from plotly.subplots import make_subplots import plotly.express as px +import aiohttp +from datetime import datetime # Replace boto3 client with aioboto3 session session = aioboto3.Session() BUCKET_NAME = 'exo-benchmarks' +DISCORD_WEBHOOK_URL = os.getenv('DISCORD_WEBHOOK_URL') +CURSOR_KEY = 'last_processed_timestamp.txt' def load_mock_data(): current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -108,6 +112,102 @@ async def get_best_benchmarks(): return best_results +async def send_discord_notification(benchmark_data): + if not DISCORD_WEBHOOK_URL: + print("Discord webhook URL not configured, skipping notification") + return + + # Create a formatted message + config_name = f"{benchmark_data['config']}/{benchmark_data['model']}" + + # Create a simple JSON string of the topology + topology = benchmark_data.get('configuration', {}) + topology_str = "```json\n" + json.dumps(topology, indent=2) + "\n```" + + message = ( + f"🚀 New Benchmark Result for **{config_name}**\n\n" + f"📊 Performance Metrics:\n" + f"• Generation TPS: **{benchmark_data['generation_tps']:.2f}**\n" + f"• Prompt TPS: **{benchmark_data['prompt_tps']:.2f}**\n" + f"• TTFT: **{benchmark_data['ttft'] * 1000:.2f}ms**\n" + f"• Prompt Length: {benchmark_data['prompt_len']}\n" + f"• Response Length: {benchmark_data['response_len']}\n\n" + f"🔍 Run Details:\n" + f"• Commit: {benchmark_data['commit'][:7]}\n" + f"• Branch: {benchmark_data['branch']}\n" + f"• Run ID: [{benchmark_data['run_id']}](https://github.com/exo-explore/exo/actions/runs/{benchmark_data['run_id']})\n\n" + f"{topology_str}" + ) + + async with aiohttp.ClientSession() as session: + await session.post(DISCORD_WEBHOOK_URL, json={'content': message}) + +async def get_cursor(): + try: + async with session.client('s3') as s3: + response = await s3.get_object(Bucket=BUCKET_NAME, Key=CURSOR_KEY) + body = await response['Body'].read() + return body.decode('utf-8').strip() + except: + return "1970-01-01T00:00:00" # Default to epoch if no cursor exists + +async def update_cursor(timestamp): + async with session.client('s3') as s3: + await s3.put_object( + Bucket=BUCKET_NAME, + Key=CURSOR_KEY, + Body=timestamp.encode('utf-8') + ) + +async def generate_best(): + # Get the last processed timestamp + last_processed = await get_cursor() + print(f"Last processed timestamp: {last_processed}") + + async with session.client('s3') as s3: + # Load all benchmark data + config_data = await load_data_from_s3() + best_benchmarks = await get_best_benchmarks() + + # Check for new benchmarks in all data + new_latest = last_processed + for config_name, data_list in config_data.items(): + for benchmark in data_list: + timestamp = benchmark['timestamp'] + + # If this benchmark is newer than our last processed timestamp + if timestamp > last_processed: + print(f"Found new benchmark for {config_name} at {timestamp}") + # Add config and model info to the benchmark data + config, model = config_name.split('/') + benchmark_with_info = dict(benchmark) + benchmark_with_info.update({ + 'config': config, + 'model': model, + }) + await send_discord_notification(benchmark_with_info) + + # Update the latest timestamp if this is the newest we've seen + if timestamp > new_latest: + new_latest = timestamp + + # Update the cursor if we found any new benchmarks + if new_latest > last_processed: + await update_cursor(new_latest) + + # Upload the best benchmarks as before + try: + await s3.put_object( + Bucket=BUCKET_NAME, + Key='best.json', + Body=json.dumps(best_benchmarks, indent=2), + ContentType='application/json' + ) + print("Successfully uploaded best.json to S3") + print(f"Public URL: https://{BUCKET_NAME}.s3.amazonaws.com/best.json") + except Exception as e: + print(f"Error uploading to S3: {e}") + app = dash.Dash(__name__) app.layout = html.Div([ @@ -301,21 +401,6 @@ def handle_click(clickData): if __name__ == '__main__': import sys if '--generate' in sys.argv: - async def generate_best(): - async with session.client('s3') as s3: - best_benchmarks = await get_best_benchmarks() - try: - await s3.put_object( - Bucket=BUCKET_NAME, - Key='best.json', - Body=json.dumps(best_benchmarks, indent=2), - ContentType='application/json' - ) - print("Successfully uploaded best.json to S3") - print(f"Public URL: https://{BUCKET_NAME}.s3.amazonaws.com/best.json") - except Exception as e: - print(f"Error uploading to S3: {e}") - asyncio.run(generate_best()) else: app.run_server(debug=True) From 149849f94ece22d9bcf7ebfa85c5a73483f14eef Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 13 Dec 2024 22:36:55 +0000 Subject: [PATCH 41/42] format metric changes nicely in discord --- extra/dashboard/dashboard.py | 68 +++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index d618ce4e3..dbcaa85bf 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -112,7 +112,39 @@ async def get_best_benchmarks(): return best_results -async def send_discord_notification(benchmark_data): +async def get_previous_benchmark(config_data, config_name, current_timestamp): + """Get the previous benchmark for a given configuration.""" + benchmarks = config_data.get(config_name, []) + # Sort by timestamp and find the most recent benchmark before current_timestamp + previous = None + for b in sorted(benchmarks, key=lambda x: x['timestamp']): + if b['timestamp'] < current_timestamp: + previous = b + else: + break + return previous + +async def format_metric_comparison(current, previous, metric, format_str=".2f", lower_is_better=False): + """Format a metric with trend indicator.""" + current_val = current.get(metric, 0) + if not previous: + return f"**{current_val:{format_str}}**" + + prev_val = previous.get(metric, 0) + diff = current_val - prev_val + + # Invert the comparison logic if lower values are better + if lower_is_better: + diff = -diff # This makes negative diffs good and positive diffs bad + + if diff > 0: + return f"**{current_val:{format_str}}** 🟢↑ ({'-' if lower_is_better else '+'}{abs(current_val - prev_val):{format_str}})" + elif diff < 0: + return f"**{current_val:{format_str}}** 🔴↓ ({'+' if lower_is_better else '-'}{abs(current_val - prev_val):{format_str}})" + else: + return f"**{current_val:{format_str}}** ⚪" + +async def send_discord_notification(benchmark_data, config_data): if not DISCORD_WEBHOOK_URL: print("Discord webhook URL not configured, skipping notification") return @@ -120,6 +152,25 @@ async def send_discord_notification(benchmark_data): # Create a formatted message config_name = f"{benchmark_data['config']}/{benchmark_data['model']}" + # Use the passed config_data instead of fetching again + previous_benchmark = await get_previous_benchmark( + config_data, + f"{benchmark_data['config']}/{benchmark_data['model']}", + benchmark_data['timestamp'] + ) + + # Format metrics with comparisons + gen_tps = await format_metric_comparison(benchmark_data, previous_benchmark, 'generation_tps') + prompt_tps = await format_metric_comparison(benchmark_data, previous_benchmark, 'prompt_tps') + ttft = await format_metric_comparison( + {'ttft': benchmark_data['ttft'] * 1000}, + {'ttft': previous_benchmark['ttft'] * 1000} if previous_benchmark else None, + 'ttft', + lower_is_better=True + ) + prompt_len = await format_metric_comparison(benchmark_data, previous_benchmark, 'prompt_len', "d") + response_len = await format_metric_comparison(benchmark_data, previous_benchmark, 'response_len', "d") + # Create a simple JSON string of the topology topology = benchmark_data.get('configuration', {}) topology_str = "```json\n" + json.dumps(topology, indent=2) + "\n```" @@ -127,11 +178,11 @@ async def send_discord_notification(benchmark_data): message = ( f"🚀 New Benchmark Result for **{config_name}**\n\n" f"📊 Performance Metrics:\n" - f"• Generation TPS: **{benchmark_data['generation_tps']:.2f}**\n" - f"• Prompt TPS: **{benchmark_data['prompt_tps']:.2f}**\n" - f"• TTFT: **{benchmark_data['ttft'] * 1000:.2f}ms**\n" - f"• Prompt Length: {benchmark_data['prompt_len']}\n" - f"• Response Length: {benchmark_data['response_len']}\n\n" + f"• Generation TPS: {gen_tps}\n" + f"• Prompt TPS: {prompt_tps}\n" + f"• TTFT: {ttft}ms\n" + f"• Prompt Length: {prompt_len}\n" + f"• Response Length: {response_len}\n\n" f"🔍 Run Details:\n" f"• Commit: {benchmark_data['commit'][:7]}\n" f"• Branch: {benchmark_data['branch']}\n" @@ -165,7 +216,7 @@ async def generate_best(): print(f"Last processed timestamp: {last_processed}") async with session.client('s3') as s3: - # Load all benchmark data + # Load all benchmark data once config_data = await load_data_from_s3() best_benchmarks = await get_best_benchmarks() @@ -185,7 +236,8 @@ async def generate_best(): 'config': config, 'model': model, }) - await send_discord_notification(benchmark_with_info) + # Pass the already loaded config_data to avoid refetching + await send_discord_notification(benchmark_with_info, config_data) # Update the latest timestamp if this is the newest we've seen if timestamp > new_latest: From 4d9d4ad05ad4fe03c530114b6f0504879357fbf4 Mon Sep 17 00:00:00 2001 From: Gary Date: Sun, 15 Dec 2024 14:54:06 +0000 Subject: [PATCH 42/42] separate line by branch name --- extra/dashboard/dashboard.py | 72 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/extra/dashboard/dashboard.py b/extra/dashboard/dashboard.py index dbcaa85bf..808f35d87 100644 --- a/extra/dashboard/dashboard.py +++ b/extra/dashboard/dashboard.py @@ -321,42 +321,60 @@ def update_graphs(n, previous_data): commits = [d['commit'] for d in data] run_ids = [d['run_id'] for d in data] + # Create a list of unique branches for this config + branches = list(set(d['branch'] for d in data)) + # Create subplot with 2 columns fig = make_subplots(rows=1, cols=2, subplot_titles=('Performance Over Time', 'Generation TPS Distribution'), column_widths=[0.7, 0.3]) - # Time series plot (left) - fig.add_trace(go.Scatter( - x=timestamps, - y=generation_tps, - name='Generation TPS', - mode='lines+markers', - hovertemplate='Commit: %{text}
TPS: %{y}', - text=commits, - customdata=run_ids, - line=dict(color='#2196F3', width=2), - marker=dict(color='#2196F3') - ), row=1, col=1) - - # Calculate statistics + # Generate a color for each branch + colors = px.colors.qualitative.Set1[:len(branches)] + branch_colors = dict(zip(branches, colors)) + + # Time series plot (left) - separate line for each branch + for branch in branches: + branch_data = [d for d in data if d['branch'] == branch] + branch_timestamps = [d['timestamp'] for d in branch_data] + branch_generation_tps = [d['generation_tps'] for d in branch_data] + branch_commits = [d['commit'] for d in branch_data] + branch_run_ids = [d['run_id'] for d in branch_data] + + fig.add_trace(go.Scatter( + x=branch_timestamps, + y=branch_generation_tps, + name=f'{branch}', + mode='lines+markers', + hovertemplate='Branch: %{text}
Commit: %{customdata}
TPS: %{y}', + text=[branch] * len(branch_timestamps), + customdata=branch_commits, + line=dict(color=branch_colors[branch], width=2), + marker=dict(color=branch_colors[branch]) + ), row=1, col=1) + + # Histogram plot (right) - stacked histogram by branch + for branch in branches: + branch_data = [d for d in data if d['branch'] == branch] + branch_generation_tps = [d['generation_tps'] for d in branch_data] + + fig.add_trace(go.Histogram( + x=branch_generation_tps, + name=f'{branch}', + nbinsx=10, + marker=dict(color=branch_colors[branch]), + opacity=0.75 + ), row=1, col=2) + + # Calculate statistics for all data gen_tps_array = np.array(generation_tps) stats = { - 'Mean': np.mean(gen_tps_array), - 'Std Dev': np.std(gen_tps_array), - 'Min': np.min(gen_tps_array), - 'Max': np.max(gen_tps_array) + 'Mean': np.mean(gen_tps_array), + 'Std Dev': np.std(gen_tps_array), + 'Min': np.min(gen_tps_array), + 'Max': np.max(gen_tps_array) } - # Histogram plot (right) - fig.add_trace(go.Histogram( - x=generation_tps, - name='Generation TPS Distribution', - nbinsx=10, - showlegend=False, - marker=dict(color='#2196F3') - ), row=1, col=2) - # Add statistics as annotations stats_text = '
'.join([f'{k}: {v:.2f}' for k, v in stats.items()]) fig.add_annotation(