Skip to content

Commit

Permalink
feat: linting & ci (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
ignas-gustainis authored Jun 26, 2024
1 parent 53abc51 commit 7ed7b3d
Show file tree
Hide file tree
Showing 13 changed files with 44 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Create and publish a Docker image

on:
workflow_dispatch:

release:
types: [published]

Expand Down
15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.4.10
hooks:
# Run the linter.
- id: ruff
# Run the formatter.
- id: ruff-format
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.10.12
3.10.12
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ RUN pip3 install -r requirements.txt
WORKDIR /app/src


CMD ["sh", "-c", "bash start.sh --hf_token ${HF_TOKEN} --flock_api_key ${FLOCK_API_KEY} --task_id ${TASK_ID} --validation_args_file validation_config_cpu.json.example"]
CMD ["sh", "-c", "bash start.sh --hf_token ${HF_TOKEN} --flock_api_key ${FLOCK_API_KEY} --task_id ${TASK_ID} --validation_args_file validation_config_cpu.json.example"]
2 changes: 1 addition & 1 deletion Dockerfile-gpu
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ WORKDIR /app/src

ENV IS_DOCKER_CONTAINER=1

CMD ["sh", "-c", "bash start.sh --hf_token ${HF_TOKEN} --flock_api_key ${FLOCK_API_KEY} --task_id ${TASK_ID} --validation_args_file validation_config.json.example"]
CMD ["sh", "-c", "bash start.sh --hf_token ${HF_TOKEN} --flock_api_key ${FLOCK_API_KEY} --task_id ${TASK_ID} --validation_args_file validation_config.json.example"]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ bash start.sh \
--task_id your_task_id \
--validation_args_file validation_config.json.example \
--auto_clean_cache False
```
```
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ transformers_stream_generator
tenacity
python-dotenv
peft>=0.10.0
gitpython
gitpython
pre-commit
2 changes: 1 addition & 1 deletion src/.env
Original file line number Diff line number Diff line change
@@ -1 +1 @@
TIME_SLEEP=600
TIME_SLEEP=600
2 changes: 1 addition & 1 deletion src/data/dummy_data.jsonl

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ while [[ "$#" -gt 0 ]]; do
--flock_api_key) FLOCK_API_KEY="$2"; shift 2 ;;
--task_id) TASK_ID="$2"; shift 2 ;;
--validation_args_file) VALIDATION_ARGS_FILE="$2"; shift 2 ;;
*)
*)
if [[ "$2" != --* ]]; then
OTHER_ARGS+="$1 $2 "; shift 2
else
Expand Down Expand Up @@ -40,4 +40,4 @@ while true; do
exit $EXIT_CODE
fi
break
done
done
22 changes: 14 additions & 8 deletions src/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
file_utils,
)

from pathlib import Path
from dotenv import load_dotenv
from pathlib import Path
from core.collator import SFTDataCollator
Expand All @@ -42,8 +41,8 @@
IS_DOCKER_CONTAINER = os.getenv("IS_DOCKER_CONTAINER", False)

if not IS_DOCKER_CONTAINER:
import git # only import git in non-docker container environment because it is not installed in docker image
import git # only import git in non-docker container environment because it is not installed in docker image

if HF_TOKEN is None:
raise ValueError(
"You need to set HF_TOKEN to download some gated model from HuggingFace"
Expand Down Expand Up @@ -157,6 +156,7 @@ def load_model(model_name_or_path: str, val_args: TrainingArguments) -> Trainer:

return model


def is_latest_version(repo_path: str):
"""
Check if the current branch is up-to-date with the remote main branch.
Expand All @@ -168,11 +168,13 @@ def is_latest_version(repo_path: str):
origin = repo.remotes.origin
origin.fetch()

local_commit = repo.commit('main')
remote_commit = repo.commit('origin/main')
local_commit = repo.commit("main")
remote_commit = repo.commit("origin/main")

if local_commit.hexsha != remote_commit.hexsha:
logger.error("The local code is not up to date with the main branch.Pls update your version")
logger.error(
"The local code is not up to date with the main branch.Pls update your version"
)
raise
except git.exc.InvalidGitRepositoryError:
logger.error("This is not a git repository.")
Expand All @@ -181,6 +183,7 @@ def is_latest_version(repo_path: str):
logger.error("An error occurred: %s", str(e))
raise


def load_sft_dataset(
eval_file: str, max_seq_length: int, template_name: str, tokenizer: AutoTokenizer
) -> UnifiedSFTDataset:
Expand Down Expand Up @@ -376,12 +379,14 @@ def loop(validation_args_file: str, task_id: str = None, auto_clean_cache: bool
logger.info("Skip auto clean the model cache")

repo_path = Path(__file__).resolve().parent.parent

if not IS_DOCKER_CONTAINER:
is_latest_version(repo_path)
else:
logger.info("Skip checking the latest version in docker container")
logger.info("Please make sure you are using the latest version of the docker image.")
logger.info(
"Please make sure you are using the latest version of the docker image."
)

fed_ledger = FedLedger(FLOCK_API_KEY)
task_id_list = task_id.split(",")
Expand Down Expand Up @@ -448,6 +453,7 @@ def loop(validation_args_file: str, task_id: str = None, auto_clean_cache: bool

os.remove(eval_file)


cli.add_command(validate)
cli.add_command(loop)

Expand Down
4 changes: 2 additions & 2 deletions src/validation_config.json.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
{
"per_device_eval_batch_size": 1,
"fp16": true,
"output_dir":".",
"remove_unused_columns": false
}
}
4 changes: 2 additions & 2 deletions src/validation_config_cpu.json.example
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
{
"per_device_eval_batch_size": 1,
"fp16": false,
"output_dir":".",
"remove_unused_columns": false,
"use_cpu": true
}
}

0 comments on commit 7ed7b3d

Please sign in to comment.