forked from ArchiveBox/ArchiveBox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
165 lines (145 loc) · 6.33 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
# Usage:
# git submodule update --init --recursive
# git pull --recurse-submodules
# docker build . -t archivebox --no-cache
# docker run -v "$PWD/data":/data archivebox init
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
# Multi-arch build:
# docker buildx create --use
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
#
# Read more about [developing
# Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
FROM python:3.11-slim-bullseye AS build
LABEL name="archivebox" \
maintainer="Nick Sweeting <[email protected]>" \
description="All-in-one personal internet archiving container" \
homepage="https://github.com/ArchiveBox/ArchiveBox" \
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
# System-level base config
ENV TZ=UTC-8 \
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
LANG=C.UTF-8 \
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
# Application-level base config
ENV CODE_DIR=/app \
VENV_PATH=/venv \
DATA_DIR=/data \
NODE_DIR=/node \
ARCHIVEBOX_USER="archivebox"
# Create non-privileged user for archivebox and chrome
RUN groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
# Install system dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
dumb-init gosu cron unzip curl \
&& rm -rf /var/lib/apt/lists/*
# Install apt dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
chromium git ffmpeg youtube-dl ripgrep \
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& ln -s /usr/bin/chromium /usr/bin/chromium-browser \
&& rm -rf /var/lib/apt/lists/*
# Install Node environment
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_18.x buster main' >> /etc/apt/sources.list \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
nodejs \
# && npm install -g npm \
&& rm -rf /var/lib/apt/lists/*
# Install Node dependencies
WORKDIR "$NODE_DIR"
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
npm_config_loglevel=error
ADD ./package.json ./package.json
ADD ./package-lock.json ./package-lock.json
RUN npm ci
# Install Python dependencies
WORKDIR "$CODE_DIR"
ENV PATH="${PATH}:$VENV_PATH/bin"
RUN python -m venv --clear --symlinks "$VENV_PATH" \
&& pip install --upgrade --quiet pip setuptools \
&& mkdir -p "$CODE_DIR/archivebox"
ADD "./setup.py" "$CODE_DIR/"
ADD "./package.json" "$CODE_DIR/archivebox/"
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
build-essential python-dev python3-dev \
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
&& pip install -r /tmp/requirements.txt \
&& pip install --upgrade youtube-dl yt-dlp \
&& apt-get purge -y build-essential python-dev python3-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Install apt development dependencies
# RUN apt-get install -qq \
# && apt-get install -qq -y --no-install-recommends \
# python3 python3-dev python3-pip python3-venv python3-all \
# dh-python debhelper devscripts dput software-properties-common \
# python3-distutils python3-setuptools python3-wheel python3-stdeb
# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
# && pip install --quiet -r /tmp/dev_requirements.txt
# Install ArchiveBox Python package and its dependencies
WORKDIR "$CODE_DIR"
ADD . "$CODE_DIR"
RUN pip install -e .
# Setup ArchiveBox runtime config
# Open up the interfaces to the outside world
# Optional:
# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
FROM python:3.11-slim-bullseye
ENV TZ=UTC-8 \
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
LANG=C.UTF-8 \
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
ENV CODE_DIR=/app \
VENV_PATH=/venv \
DATA_DIR=/data \
NODE_DIR=/node \
ARCHIVEBOX_USER="archivebox"
# Create non-privileged user for archivebox and chrome
RUN groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
COPY --from=build /app /app
COPY --from=build /usr/local/bin/ /usr/local/bin/
COPY --from=build /usr/local/lib/ /usr/local/lib/
COPY --from=build /usr/bin /usr/bin
COPY --from=build /usr/sbin/gosu /usr/sbin/gosu
COPY --from=build /node /node
WORKDIR "$DATA_DIR"
ENV IN_DOCKER=True \
CHROME_SANDBOX=False \
CHROME_BINARY="chromium" \
USE_SINGLEFILE=True \
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
USE_READABILITY=True \
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
USE_MERCURY=True \
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser" \
YOUTUBEDL_BINARY="yt-dlp"
RUN apt-get update && apt-get install -qq -y curl wget
# Print version for nice docker finish summary
RUN archivebox version
RUN /app/bin/docker_entrypoint.sh archivebox version
VOLUME "$DATA_DIR"
EXPOSE 8000
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]