-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from UnicornChan/feature-support-multi-instruct
Feature support multi instruct
- Loading branch information
Showing
9 changed files
with
272 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
FROM node:20.16.0 as web_compile | ||
WORKDIR /home | ||
RUN <<EOF | ||
git clone https://github.com/kvcache-ai/ktransformers.git && | ||
cd ktransformers/ktransformers/website/ && | ||
npm install @vue/cli && | ||
npm run build && | ||
rm -rf node_modules | ||
EOF | ||
|
||
|
||
|
||
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server | ||
WORKDIR /workspace | ||
COPY --from=web_compile /home/ktransformers /workspace/ktransformers | ||
RUN <<EOF | ||
apt update -y && apt install -y --no-install-recommends \ | ||
git \ | ||
wget \ | ||
vim \ | ||
gcc \ | ||
g++ \ | ||
cmake && | ||
rm -rf /var/lib/apt/lists/* && | ||
cd ktransformers && | ||
git submodule init && | ||
git submodule update && | ||
pip install ninja pyproject numpy && | ||
pip install flash-attn && | ||
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose && | ||
pip cache purge | ||
EOF | ||
|
||
ENTRYPOINT [ "/opt/conda/bin/ktransformers" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Docker | ||
|
||
## Prerequisites | ||
* Docker must be installed and running on your system. | ||
* Create a folder to store big models & intermediate files (ex. /mnt/models) | ||
|
||
## Images | ||
There are Docker images available for our project: | ||
|
||
**Uploading** | ||
|
||
## Building docker image locally | ||
- Download Dockerfile in [there](../../Dockerfile) | ||
|
||
- finish, execute | ||
```bash | ||
docker build -t approachingai/ktransformers:v0.1.1 . | ||
``` | ||
|
||
## Usage | ||
|
||
Assuming you have the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit) that you can use the GPU in a Docker container. | ||
``` | ||
docker run --gpus all -v /path/to/models:/models -p 10002:10002 approachingai/ktransformers:v0.1.1 --port 10002 --gguf_path /models/path/to/gguf_path --model_path /models/path/to/model_path --web True | ||
``` | ||
|
||
More operators you can see in the [readme](../../README.md) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "0.1.0" | ||
__version__ = "0.1.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
include(CheckCSourceRuns) | ||
|
||
set(AVX_CODE " | ||
#include <immintrin.h> | ||
int main() | ||
{ | ||
__m256 a; | ||
a = _mm256_set1_ps(0); | ||
return 0; | ||
} | ||
") | ||
|
||
set(AVX512_CODE " | ||
#include <immintrin.h> | ||
int main() | ||
{ | ||
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0); | ||
__m512i b = a; | ||
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); | ||
return 0; | ||
} | ||
") | ||
|
||
set(AVX2_CODE " | ||
#include <immintrin.h> | ||
int main() | ||
{ | ||
__m256i a = {0}; | ||
a = _mm256_abs_epi16(a); | ||
__m256i x; | ||
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code | ||
return 0; | ||
} | ||
") | ||
|
||
set(FMA_CODE " | ||
#include <immintrin.h> | ||
int main() | ||
{ | ||
__m256 acc = _mm256_setzero_ps(); | ||
const __m256 d = _mm256_setzero_ps(); | ||
const __m256 p = _mm256_setzero_ps(); | ||
acc = _mm256_fmadd_ps( d, p, acc ); | ||
return 0; | ||
} | ||
") | ||
|
||
macro(check_sse type flags) | ||
set(__FLAG_I 1) | ||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||
foreach (__FLAG ${flags}) | ||
if (NOT ${type}_FOUND) | ||
set(CMAKE_REQUIRED_FLAGS ${__FLAG}) | ||
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I}) | ||
if (HAS_${type}_${__FLAG_I}) | ||
set(${type}_FOUND TRUE CACHE BOOL "${type} support") | ||
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") | ||
endif() | ||
math(EXPR __FLAG_I "${__FLAG_I}+1") | ||
endif() | ||
endforeach() | ||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||
|
||
if (NOT ${type}_FOUND) | ||
set(${type}_FOUND FALSE CACHE BOOL "${type} support") | ||
set(${type}_FLAGS "" CACHE STRING "${type} flags") | ||
endif() | ||
|
||
mark_as_advanced(${type}_FOUND ${type}_FLAGS) | ||
endmacro() | ||
|
||
# flags are for MSVC only! | ||
check_sse("AVX" " ;/arch:AVX") | ||
if (NOT ${AVX_FOUND}) | ||
set(LLAMA_AVX OFF) | ||
else() | ||
set(LLAMA_AVX ON) | ||
endif() | ||
|
||
check_sse("AVX2" " ;/arch:AVX2") | ||
check_sse("FMA" " ;/arch:AVX2") | ||
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND})) | ||
set(LLAMA_AVX2 OFF) | ||
else() | ||
set(LLAMA_AVX2 ON) | ||
endif() | ||
|
||
check_sse("AVX512" " ;/arch:AVX512") | ||
if (NOT ${AVX512_FOUND}) | ||
set(LLAMA_AVX512 OFF) | ||
else() | ||
set(LLAMA_AVX512 ON) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
[build-system] | ||
requires = [ | ||
"setuptools", | ||
"torch == 2.3.1", | ||
"torch >= 2.3.0", | ||
"ninja", | ||
"packaging" | ||
] | ||
|
@@ -29,7 +29,7 @@ dependencies = [ | |
"fire" | ||
] | ||
|
||
requires-python = ">=3.11" | ||
requires-python = ">=3.10" | ||
|
||
authors = [ | ||
{name = "KVCache.AI", email = "[email protected]"} | ||
|
@@ -50,6 +50,7 @@ keywords = ["ktransformers", "llm"] | |
|
||
classifiers = [ | ||
"Development Status :: 4 - Beta", | ||
"Programming Language :: Python :: 3.10", | ||
"Programming Language :: Python :: 3.11", | ||
"Programming Language :: Python :: 3.12" | ||
] | ||
|
Oops, something went wrong.