forked from rayonlabs/vision-workers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.llm_server
executable file
·49 lines (38 loc) · 1.57 KB
/
Dockerfile.llm_server
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04
WORKDIR /app
# Installing necessary packages
RUN apt-get update && apt-get install -y wget git
# Installing Miniconda
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& chmod 700 Miniconda3-latest-Linux-x86_64.sh \
&& ./Miniconda3-latest-Linux-x86_64.sh -b \
&& rm Miniconda3-latest-Linux-x86_64.sh
ENV PATH="/root/miniconda3/bin:${PATH}"
RUN conda create -n venv python=3.10.13 -y \
&& echo "source activate venv" > ~/.bashrc
ENV PATH="/root/miniconda3/envs/venv/bin:${PATH}"
# Installing CUDA toolkit matching the CUDA version from the base image
RUN CUDA_VERSION="11.8.0" && \
CUDA_VERSION_MAJOR=$(echo $CUDA_VERSION | cut -d'.' -f1) && \
CUDA_VERSION_MINOR=$(echo $CUDA_VERSION | cut -d'.' -f2) && \
CUDA_VERSION_SIMPLE="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.0" && \
conda install nvidia/label/cuda-${CUDA_VERSION_SIMPLE}::cuda-toolkit -y
# llm server requirements
COPY llm_server/requirements.txt /app/requirements.txt
RUN pip install --upgrade pip \
&& pip install -U wheel \
&& pip install -r /app/requirements.txt \
&& pip install flash-attn --no-build-isolation
COPY llm_server/app /app/app
COPY llm_server/entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Create new cache dir
RUN mkdir /app/cache \
&& mkdir /app/cache/hub \
&& chmod -R 777 /app/cache \
&& chmod -R 777 /app/cache/hub
# Change HF cache dir
ENV HF_HOME=/app/cache
ENV HF_HUB_CACHE=/app/cache/hub
ENTRYPOINT ["/app/entrypoint.sh"]