forked from modelscope/data-juicer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
28 lines (21 loc) · 782 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# The data-juicer image includes all open-source contents of data-juicer,
# and it will be instaled in editable mode.
FROM python:3.8.18
# prepare the java env
WORKDIR /opt
# download jdk
RUN wget https://aka.ms/download-jdk/microsoft-jdk-17.0.9-linux-x64.tar.gz -O jdk.tar.gz && \
tar -xzf jdk.tar.gz && \
rm -rf jdk.tar.gz && \
mv jdk-17.0.9+8 jdk
# set the environment variable
ENV JAVA_HOME=/opt/jdk
WORKDIR /data-juicer
# install requirements first to better reuse installed library cache
COPY environments/ environments/
RUN cat environments/* | xargs pip install --default-timeout 1000
# install data-juicer then
COPY . .
RUN pip install -v -e .[all]
# install 3rd-party system dependencies
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y