Skip to content

Commit

Permalink
[projects][vllm] Use pre-compiled vllm
Browse files Browse the repository at this point in the history
This commit installs the pre-compiled vllm using pipx instead of compiling.
  • Loading branch information
sasha0552 authored May 14, 2024
1 parent 9c877dc commit b3ea874
Show file tree
Hide file tree
Showing 14 changed files with 92 additions and 103 deletions.
8 changes: 7 additions & 1 deletion .ci/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ def main():
# revisions
options["revisions"] = dict(config.items("revisions"))

# files
options["files"] = dict(config.items("files"))

def gh_release(name):
return f"{options['repositories'][name]}/releases/download/{options['revisions'][name]}/{options['files'][name]}"

# list of rendered files
rendered = []

Expand All @@ -55,7 +61,7 @@ def main():
rendered.sort()

# render file
render_template(filepath, platform=platform, type=type, rendered=rendered, **options)
render_template(filepath, platform=platform, type=type, rendered=rendered, gh_release=gh_release, **options)

# add output file to rendered list
rendered.append(filepath[:-7].replace("\\", "/"))
Expand Down
27 changes: 17 additions & 10 deletions .ci/options.ini
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
[repositories]
automatic = https://github.com/vladmandic/automatic.git
axolotl = https://github.com/OpenAccess-AI-Collective/axolotl.git
comfyui = https://github.com/comfyanonymous/ComfyUI.git
llamacpp = https://github.com/ggerganov/llama.cpp.git
sillytavern = https://github.com/SillyTavern/SillyTavern.git
sillytavern_extras = https://github.com/SillyTavern/SillyTavern-Extras.git
text_generation_webui = https://github.com/oobabooga/text-generation-webui.git
vllm = https://github.com/sasha0552/vllm.git
automatic = https://github.com/vladmandic/automatic
axolotl = https://github.com/OpenAccess-AI-Collective/axolotl
comfyui = https://github.com/comfyanonymous/ComfyUI
llamacpp = https://github.com/ggerganov/llama.cpp
sillytavern = https://github.com/SillyTavern/SillyTavern
sillytavern_extras = https://github.com/SillyTavern/SillyTavern-Extras
text_generation_webui = https://github.com/oobabooga/text-generation-webui
triton = https://github.com/sasha0552/triton
vllm = https://github.com/sasha0552/vllm

[revisions]
automatic = e081f232d2648d79659b9a53070cecb544b85b3d
Expand All @@ -16,12 +17,18 @@ llamacpp = b2862
sillytavern = 1.11.8
sillytavern_extras = 1d82f3a8607319d1e09a2f4749a09c564c18c320
text_generation_webui = snapshot-2024-04-28
vllm = prs
triton = 2024-05-11_06-27-29
vllm = 2024-05-14_08-11-08

[files]
triton = triton_nightly-3.0.0.post20240511053339-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
vllm = vllm-0.4.2+cu124-cp311-cp311-linux_x86_64.whl

[strategy]
axolotl = release
llamacpp = release
sillytavern = release
text_generation_webui = release
vllm = branch
triton = release+first
vllm = release+first

54 changes: 45 additions & 9 deletions .ci/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@
import requests

def fetch_latest_revision(url, strategy):
# parse strategy
strategy = strategy.split("+", 2)
release_strategy = strategy[0]
file_strategy = strategy[1] if len(strategy) == 2 else None

# github
if url.startswith("https://github.com/") and url.endswith(".git"):
if url.startswith("https://github.com/"):
# extract repo path
path = url[19:-4]
path = url[19:]

match strategy:
match release_strategy:
case "commit":
# get commits from api
response = requests.get(f"https://api.github.com/repos/{path}/commits")
Expand All @@ -20,8 +25,8 @@ def fetch_latest_revision(url, strategy):
# parse json
data = response.json()

# return first commit id
return data[0]["sha"]
# return first commit id and no file
return data[0]["sha"], None

case "release":
# get releases from api
Expand All @@ -33,10 +38,33 @@ def fetch_latest_revision(url, strategy):
# parse json
data = response.json()

# return variables
ret_release = None
ret_file = None

# find first release
for release in data:
if not release["prerelease"]:
return release["tag_name"]
release_url = release["url"]
ret_release = release["tag_name"]
break

match file_strategy:
case "first":
# get release from api
response = requests.get(release_url)

# throw error if not success
response.raise_for_status()

# parse json
data = response.json()

# return first file
ret_file = data["assets"][0]["name"]

# return release and file
return ret_release, ret_file

raise ValueError(f"Unsupported url or strategy ({url}, {strategy})")

Expand All @@ -62,9 +90,17 @@ def main():
if key in config["strategy"]:
strategy = config["strategy"][key]

# if not locked, update
if strategy != "branch" and strategy != "locked":
config["revisions"][key] = fetch_latest_revision(value, strategy)
# if needed, update version
if not strategy.startswith("branch") and not strategy.startswith("locked"):
# fetch info
revision, file = fetch_latest_revision(value, strategy)

# update revision
config["revisions"][key] = revision

# if needed, update file
if file is not None:
config["files"][key] = file

# write config back
with open(".ci/options.ini", "w") as file:
Expand Down
25 changes: 4 additions & 21 deletions airootfs/root/customize_airootfs.sh.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,11 @@ set -eu
# customize_airootfs temporary directory
export CUSTOMIZE_AIROOTFS=/root/customize_airootfs

# directory with patches
export TORI_PATCHES=/home/tori/.local/share/tori/patches

# disable package caching
export PIP_NO_CACHE_DIR=0

# limit the number of parallel jobs to avoid OOM
export MAX_JOBS=2
# home directory
export HOME=/home/tori

# define supported architectures
export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0"
export CMAKE_CUDA_ARCHITECTURES=$TORCH_CUDA_ARCH_LIST
export CUDA_ARCHITECTURES=$TORCH_CUDA_ARCH_LIST

# cuda home directory
export CUDA_HOME=/opt/cuda

# use gcc 12
export CC=gcc-12
export CXX=g++-12
export CUDACXX=$CUDA_HOME/bin/nvcc
export CUDAHOSTCXX=g++-12
# directory with patches
export TORI_PATCHES=$HOME/.local/share/tori/patches
##### ENVIRONMENT VARIABLES #####

# set user password
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

# clone repository
git clone "{{ repositories.automatic }}"
git clone "{{ repositories.automatic }}.git"

# automatic patches
pushd "automatic"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -eu

{% if platform == "cuda" %}
# clone repository
git clone "{{ repositories.axolotl }}"
git clone "{{ repositories.axolotl }}.git"

# axolotl patches
pushd "axolotl"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

# clone repository
git clone "{{ repositories.comfyui }}"
git clone "{{ repositories.comfyui }}.git"

# ComfyUI patches
pushd "ComfyUI"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

# clone repository
git clone "{{ repositories.llamacpp }}"
git clone "{{ repositories.llamacpp }}.git"

# llama.cpp patches
pushd "llama.cpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

# clone repository
git clone "{{ repositories.sillytavern_extras }}"
git clone "{{ repositories.sillytavern_extras }}.git"

# SillyTavern-Extras patches
pushd "SillyTavern-Extras"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

# clone repository
git clone "{{ repositories.sillytavern }}"
git clone "{{ repositories.sillytavern }}.git"

# SillyTavern patches
pushd "SillyTavern"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

# clone repository
git clone "{{ repositories.text_generation_webui }}"
git clone "{{ repositories.text_generation_webui }}.git"

# text-generation-webui patches
pushd "text-generation-webui"
Expand Down
63 changes: 11 additions & 52 deletions airootfs/root/customize_airootfs/scripts/1000-vllm.sh.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,21 @@
set -eu

{% if platform == "cuda" %}
# clone repository
git clone "{{ repositories.vllm }}"
# install vllm
pipx install "{{ gh_release("vllm") }}"

# vllm patches
pushd "vllm"
# use specific revision
git checkout "{{ revisions.vllm }}"
# remove triton
pipx runpip vllm uninstall -y triton

# create branch
git checkout -b tori
# install triton with pascal patches
pipx inject vllm "{{ gh_release("triton") }}"

{% if platform == "cuda" %}
# install nvidia-pstate
pipx inject vllm nvidia-pstate

# apply patches
pushd "$HOME/.local/share/pipx/venvs/vllm/lib/python3.11/site-packages/vllm"
# drop pstate in idle
patch -p1 < "$TORI_PATCHES/0000-vllm-drop-pstate-in-idle.patch"
{% endif %}

# commit changes
git add .
git commit -m "Apply patches"
popd

# vllm dependencies
pushd "vllm"
# create venv
python3 -m venv venv

# activate venv
source venv/bin/activate
# install dependencies
pip3 install -r requirements-build.txt

# build native extension
python3 setup.py build_ext --inplace
deactivate

# remove venv
rm -fr venv

# create venv
python3 -m venv venv

# activate venv
source venv/bin/activate
{% if platform == "cuda" %}
# install nvidia-pstate
pip3 install nvidia-pstate
{% endif %}

# install dependencies
pip3 install -r requirements-cuda.txt

# remove triton
pip3 uninstall -y triton

# install triton nightly
pip3 install "https://github.com/sasha0552/triton/releases/download/2024-05-11_06-27-29/triton_nightly-3.0.0.post20240511053339-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl"
deactivate
popd
{% endif %}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ rm -fr /home/tori/.keras
# npm
rm -fr /home/tori/.npm

# triton
rm -fr /home/tori/.triton

# automatic installation config
rm -f /home/tori/automatic/config.json

Expand Down
1 change: 1 addition & 0 deletions packages.x86_64.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ openssh
progress
pv
python-pip
python-pipx
qemu-guest-agent
rdfind
reptyr
Expand Down

0 comments on commit b3ea874

Please sign in to comment.