Skip to content

Commit

Permalink
Add format check (#142)
Browse files Browse the repository at this point in the history
- format.yml for online check.
- .pre-commit-config.yaml for commit check.
  • Loading branch information
phoenixdong authored Jun 12, 2024
1 parent 9171a0a commit c5b44e8
Show file tree
Hide file tree
Showing 19 changed files with 303 additions and 170 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: format

on:
pull_request:
branches: [ "main" ]
types: [opened, synchronize, reopened]

env:
CHECK_FILES: >-
flagscale/auto_tuner/*.py
flagscale/auto_tuner/prune/*.py
flagscale/auto_tuner/record/*.py
flagscale/auto_tuner/search/*.py
flagscale/launcher/*.py
flagscale/logger.py
flagscale/patches_utils.py
flagscale/datasets/sft_dataset.py
jobs:
format:
runs-on: ubuntu-latest

steps:
- name: Checkout Code
uses: actions/checkout@v2

- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: "3.10"

- name: Install dependencies
run: |
pip install black isort
- name: Run Black
run: >-
black --check --diff --include $CHECK_FILES ./
- name: Run Isort
run: |
isort --profile black --check --diff $CHECK_FILES
102 changes: 60 additions & 42 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,46 +9,64 @@ on:
pull_request:
branches: [ "main" ]

env:
REPORT_ADDR: http://120.92.110.224:8081

jobs:
test:
runs-on: self-hosted
container:
image: localhost:5000/flagscale_cicd:v1.3
ports:
- 80
volumes:
- /home/flagscale_cicd/flask/static:/workspace/report
options: --gpus all --hostname flagscale_cicd
steps:
- name: Checkout Code
uses: actions/checkout@v2

- name: Megatron Unit Test
run: |
cd megatron
export PYTHONPATH=..:$PYTHONPATH
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/data
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/dist_checkpointing
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/fusions
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/models
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/pipeline_parallel
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/tensor_parallel
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/transformer
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-megatron --cov=megatron/core -q -x tests/unit_tests/*.py
- name: Megatron Unit Test Coverage Online Report
run: |
echo "You can access the test coverage report at the http://120.92.110.224:8081/${{github.sha}}/cov-report-megatron/index.html"
- name: Flagscale Unit Test
run: |
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib --cov-append --cov-report=html:/workspace/report/${{github.sha}}/cov-report-flagscale --cov=flagscale -q -x tests/unit_tests/launcher
- name: Flagscale Unit Test Coverage Online Report
run: |
echo "You can access the test coverage report at the http://120.92.110.224:8081/${{github.sha}}/cov-report-flagscale/index.html"
- name: Flagscale Functional Test
run: |
python run.py --config-path tests/functional_tests/aquila/conf --config-name config action=test
pytest -s tests/functional_tests/test_result.py --test_reaults_path=./tests/functional_tests/aquila/test_result
megatron-unit-test:
runs-on: self-hosted
container:
image: localhost:5000/flagscale_cicd:v1.3
ports:
- 80
volumes:
- /home/flagscale_cicd/flask/static:/workspace/report
options: --gpus all --hostname flagscale_cicd
steps:
- name: Checkout Code
uses: actions/checkout@v2

- name: Megatron Unit Test
run: tests/scripts/unit_test_megatron.sh ${{github.sha}}

- name: Megatron Unit Test Coverage Online Report
run: echo "You can access the test coverage report at the $REPORT_ADDR/${{github.sha}}/cov-report-megatron/index.html"


flagscale-unit-test:
runs-on: self-hosted
needs: megatron-unit-test
container:
image: localhost:5000/flagscale_cicd:v1.3
ports:
- 80
volumes:
- /home/flagscale_cicd/flask/static:/workspace/report
options: --gpus all --hostname flagscale_cicd
steps:
- name: Checkout Code
uses: actions/checkout@v2

- name: Flagscale Unit Test
run: tests/scripts/unit_test_flagscale.sh ${{github.sha}}

- name: Flagscale Unit Test Coverage Online Report
run: echo "You can access the test coverage report at the $REPORT_ADDR/${{github.sha}}/cov-report-flagscale/index.html"


flagscale-functional-test:
runs-on: self-hosted
needs: flagscale-unit-test
container:
image: localhost:5000/flagscale_cicd:v1.3
ports:
- 80
volumes:
- /home/flagscale_cicd/flask/static:/workspace/report
options: --gpus all --hostname flagscale_cicd
steps:
- name: Checkout Code
uses: actions/checkout@v2

- name: Flagscale Functional Test
run: tests/scripts/functional_test_flagscale.sh
28 changes: 28 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
check_files: &check_files |
(?x)^(
flagscale/auto_tuner/.*\.py|
flagscale/auto_tuner/prune/\..*\.py|
flagscale/auto_tuner/record/\..*\.py|
flagscale/auto_tuner/search/\..*\.py|
flagscale/launcher/\..*\.py|
flagscale/logger\.py|
flagscale/patches_utils\.py|
flagscale/datasets/sft_dataset\.py
)$

repos:
- repo: local
hooks:
- id: black
name: black
entry: black
language: system
types: [python]
files: *check_files
- id: isort
name: isort
entry: isort
language: system
types: [python]
files: *check_files
args: ["--profile", "black"]
14 changes: 7 additions & 7 deletions flagscale/auto_tuner/generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import copy
import os


class Generator:
Expand All @@ -16,8 +16,7 @@ def __init__(self, config):
"tensor_model_parallel_size": "tensor_model_parallel_size",
"sequence_parallel": "sequence_parallel",
"pipeline_model_parallel_size": "pipeline_model_parallel_size",
"num_layers_per_virtual_pipeline_stage":
"num_layers_per_virtual_pipeline_stage",
"num_layers_per_virtual_pipeline_stage": "num_layers_per_virtual_pipeline_stage",
"recompute_method": "recompute_method",
"recompute_granularity": "recompute_granularity",
"recompute_num_layers": "recompute_num_layers",
Expand Down Expand Up @@ -81,14 +80,15 @@ def gen(self, strategy):
# Set train_iters of each task
if "control" in config.experiment.auto_tuner:
config.train.model.train_iters = config.experiment.auto_tuner.control.get(
"train_iters", 5)
"train_iters", 5
)
else:
config.train.model.train_iters = 5

# log dir
config.experiment.exp_dir = os.path.join(config.experiment.exp_dir,
"auto_tuner",
f"task_{strategy['idx']}")
config.experiment.exp_dir = os.path.join(
config.experiment.exp_dir, "auto_tuner", f"task_{strategy['idx']}"
)

return config

Expand Down
4 changes: 2 additions & 2 deletions flagscale/auto_tuner/prune/history.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from ..utils import beside
from ..utils import compare_by_recompute

from ..utils import beside, compare_by_recompute

_HISTORY_BASED_PRUNE_FUNC = []
logger = logging.getLogger("FlagScale-AutoTuner")
Expand Down
1 change: 1 addition & 0 deletions flagscale/auto_tuner/prune/pruner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def prune(self, strategy, history=[]):
if func(self.config, strategy, history):
not_run = True
break

history.append(strategy)
if not_run:
self.pruned_count += 1
Expand Down
3 changes: 2 additions & 1 deletion flagscale/auto_tuner/record/recorder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
import os
import re
import logging
import subprocess

import pandas as pd


Expand Down
12 changes: 8 additions & 4 deletions flagscale/auto_tuner/search/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,19 @@ def __init__(self, strategies, config):
def checkout(self, mode):
if mode == "memory":
from ..utils import sort_by_memory

if self.idx > 0 and self.idx < len(self.strategies):
self.strategies = self.strategies[:self.idx] + sorted(
self.strategies[self.idx:], key=sort_by_memory)
self.strategies = self.strategies[: self.idx] + sorted(
self.strategies[self.idx :], key=sort_by_memory
)

elif mode == "performance":
from ..utils import sort_by_performance

if self.idx > 0 and self.idx < len(self.strategies):
self.strategies = self.strategies[:self.idx] + sorted(
self.strategies[self.idx:], key=sort_by_performance)
self.strategies = self.strategies[: self.idx] + sorted(
self.strategies[self.idx :], key=sort_by_performance
)

def search(self):
"""Return a task iteratively."""
Expand Down
4 changes: 2 additions & 2 deletions flagscale/auto_tuner/search/searcher.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import time
import copy
import logging
from ..utils import divisible
import time

from ..utils import divisible

__BUILT_IN_STRATEGY_DIMS__ = [
"data_parallel_size",
Expand Down
Loading

0 comments on commit c5b44e8

Please sign in to comment.