diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f0fc184..081cae4 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,6 +7,6 @@ version: 2 updates: - package-ecosystem: "pip" - directory: "/" + directory: "/pkg" schedule: interval: weekly diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 981cc0f..ba97b88 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,4 +1,4 @@ -name-template: 'kimchi v$NEXT_PATCH_VERSION' +name-template: 'kimchima v$NEXT_PATCH_VERSION' tag-template: 'v$NEXT_PATCH_VERSION' categories: - title: '🚀 Features' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5b394b5..33b4594 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,11 +14,9 @@ jobs: name: 'Test 🔬' strategy: matrix: - os: - - ubuntu-latest - python-version: - - "3.11" - + os: ["ubuntu-latest"] + python-version: ["3.11"] + poetry-version: ["1.8.2"] runs-on: ${{ matrix.os }} steps: - name: Check repository @@ -27,14 +25,16 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - # cache: 'pip' + cache: 'pip' - name: Display Python version run: python -c "import sys; print(sys.version)" + - name: Install Poetry + uses: abatilo/actions-poetry@v2 + with: + poetry-version: ${{ matrix.poetry-version }} - name: Install dependencies run: | - conda env create -f kimchi.yml - conda activate kimchi + make install - name: Run tests run: | make test - diff --git a/Makefile b/Makefile index ccd66ea..030fcd3 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,6 @@ upload: twine upload dist/* --verbose -.PHONY: test -test: - @python -m unittest -v - - ################################Poetry################################ .PHONY: poetry poetry: @@ -28,6 +23,11 @@ install: @poetry install -vvv +.PHONY: test +test: + @poetry run python -m unittest discover -v + + # build and publish .PHONY: publish publish: @@ -42,4 +42,26 @@ config: .PHONY: source source: - @poetry config repositories.source https://pypi.org/simple + @poetry config repositories.source https://pypi.org/project/kimchima + + +################################################################################################### +# Commit and recommit changes to github +.PONY: commit +commit: + @echo "Committing changes..." + @git add . + @git commit -s -m"${message}" + @git push origin ${branch} + @git log -1 + @echo "Changes committed and pushed to github." + + +.PONY: recommit +recommit: + @echo "Committing changes..." + @git add . + @git commit -s --amend --no-edit + @git push -f origin ${branch} + @git log -1 + @echo "Changes committed and pushed to github." \ No newline at end of file diff --git a/README.md b/README.md index 1d885cc..1c64423 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,29 @@ -# kimchi +# kimchima -The collections of tools for testing and dumping LLMs. And this project is inspired by [Llama2-burn Project](https://github.com/Gadersd/llama2-burn/tree/main). And the main purpose of this project is to make sure the Llama2 model works well before we load it into the Rust ML framework. +[![Backend CI/CD 🚀](https://github.com/Aisuko/kimchima/actions/workflows/ci.yml/badge.svg)](https://github.com/Aisuko/kimchima/actions/workflows/ci.yml) +[![Release Drafter 🚀](https://github.com/Aisuko/kimchima/actions/workflows/release-drafter.yml/badge.svg)](https://github.com/Aisuko/kimchima/actions/workflows/release-drafter.yml) + +The collections of tools for ML model development. # Usage You can use it as a command line tool if you like. And you can also use it as a library. Or you can run it in VSCode with [`launch.json`](.vscode/launch.json). - - -### Test the model +## Command Line Tool ```bash -kimchi test -# or -python3 kimchi.py test +$ python -m kimchima auto sentence-transformers/all-MiniLM-L6-v2 Melbourne + ``` -### Dump the model -```bash -kimchi dump -# or -python3 kimchi.py dump -``` -``` -pip install torch==2.2.1 -pip install sentencepiece==0.2.0 -pip install transformers==4.39.1 -``` +# Acknowledgement -# Credits - [Llama2-burn Project](https://github.com/Gadersd/llama2-burn/tree/main) # License + This project is licensed as specified in the [LICENSE](./LICENSE) file. \ No newline at end of file diff --git a/kimchi.yml b/kimchi.yml deleted file mode 100644 index 57d16cc..0000000 --- a/kimchi.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: kimchi -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - bzip2=1.0.8=h5eee18b_5 - - ca-certificates=2024.3.11=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.4.4=h6a678d5_0 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - libuuid=1.41.5=h5eee18b_0 - - ncurses=6.4=h6a678d5_0 - - openssl=3.0.13=h7f8727e_0 - - pip=23.3.1=py311h06a4308_0 - - python=3.11.8=h955ad1f_0 - - readline=8.2=h5eee18b_0 - - setuptools=68.2.2=py311h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - tzdata=2024a=h04d1e81_0 - - wheel=0.41.2=py311h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - certifi==2024.2.2 - - charset-normalizer==3.3.2 - - filelock==3.13.3 - - fsspec==2024.3.1 - - huggingface-hub==0.22.2 - - idna==3.6 - - jinja2==3.1.3 - - markupsafe==2.1.5 - - mpmath==1.3.0 - - networkx==3.2.1 - - numpy==1.26.4 - - nvidia-cublas-cu12==12.1.3.1 - - nvidia-cuda-cupti-cu12==12.1.105 - - nvidia-cuda-nvrtc-cu12==12.1.105 - - nvidia-cuda-runtime-cu12==12.1.105 - - nvidia-cudnn-cu12==8.9.2.26 - - nvidia-cufft-cu12==11.0.2.54 - - nvidia-curand-cu12==10.3.2.106 - - nvidia-cusolver-cu12==11.4.5.107 - - nvidia-cusparse-cu12==12.1.0.106 - - nvidia-nccl-cu12==2.19.3 - - nvidia-nvjitlink-cu12==12.4.99 - - nvidia-nvtx-cu12==12.1.105 - - packaging==24.0 - - pyyaml==6.0.1 - - regex==2023.12.25 - - requests==2.31.0 - - safetensors==0.4.2 - - sentencepiece==0.2.0 - - sympy==1.12 - - tokenizers==0.15.2 - - torch==2.2.2 - - tqdm==4.66.2 - - transformers==4.39.2 - - triton==2.2.0 - - typing-extensions==4.10.0 - - urllib3==2.2.1 -prefix: /opt/conda/envs/kimchi diff --git a/kimchima.py b/kimchima.py index 3eedfaf..0b27aaf 100644 --- a/kimchima.py +++ b/kimchima.py @@ -1,9 +1,26 @@ +# coding=utf-8 +# Copyright [2024] [Aisuko] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse from cmds.auto import CommandAuto def main(): + """ + Main function for kimchima. + """ parser = argparse.ArgumentParser( prog="kimchima", description="A command line tool for natural language processing." @@ -16,16 +33,6 @@ def main(): parser_auto.add_argument("text", help="text str or list of text str") parser_auto.set_defaults(func=CommandAuto.auto) - # parser_command_test=subparsers.add_parser("test", help="test help") - # parser_command_test.add_argument("model_dir", help="model directory") - # parser_command_test.add_argument("tokenizer_path", help="tokenizer path") - # parser_command_test.set_defaults(func=CommandTestModel.t_model) - - # parser_command_dump=subparsers.add_parser("dump", help="dump help") - # parser_command_dump.add_argument("model_dir", help="model directory") - # parser_command_dump.add_argument("tokenizer_path", help="tokenizer path") - # parser_command_dump.set_defaults(func=CommandDumpModel.dump_model) - args = parser.parse_args() args.func(args) diff --git a/pkg/auto/auto.py b/pkg/auto/auto.py index 63e1edb..1bb68b4 100644 --- a/pkg/auto/auto.py +++ b/pkg/auto/auto.py @@ -1,23 +1,16 @@ # coding=utf-8 -# Copyright (c) 2023 Aisuko - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright [2024] [Aisuko] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import torch import torch.nn.functional as F diff --git a/pkg/devices/devices.py b/pkg/devices/devices.py index d8bd9b2..0688da5 100644 --- a/pkg/devices/devices.py +++ b/pkg/devices/devices.py @@ -1,3 +1,17 @@ +# coding=utf-8 +# Copyright [2024] [Aisuko] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from enum import Enum import torch import platform diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pkg/auto/test_auto.py b/tests/test_auto.py similarity index 53% rename from pkg/auto/test_auto.py rename to tests/test_auto.py index 52a008f..ee69585 100644 --- a/pkg/auto/test_auto.py +++ b/tests/test_auto.py @@ -1,4 +1,17 @@ # coding=utf-8 +# Copyright [2024] [Aisuko] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import unittest from pkg.auto import Auto diff --git a/pkg/devices/test_devices.py b/tests/test_devices.py similarity index 53% rename from pkg/devices/test_devices.py rename to tests/test_devices.py index 5af2eb8..42032c5 100644 --- a/pkg/devices/test_devices.py +++ b/tests/test_devices.py @@ -1,3 +1,17 @@ +# coding=utf-8 +# Copyright [2024] [Aisuko] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import unittest import platform import torch diff --git a/pkg/dump/test_dump.py b/tests/test_dump.py similarity index 100% rename from pkg/dump/test_dump.py rename to tests/test_dump.py diff --git a/pkg/tokenizer/test_tokenizer.py b/tests/test_tokenizer.py similarity index 100% rename from pkg/tokenizer/test_tokenizer.py rename to tests/test_tokenizer.py