Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a multistage docker image from a debian-bookworm base #3520

Merged
merged 7 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,42 @@
# NODE_VERSION is set by default in the config.ts, the following value will only
# be used if you build images by default with docker build
ARG NODE_VERSION=18.18.2
FROM terascope/node-base:${NODE_VERSION}
FROM node:${NODE_VERSION}-bookworm as base

ENV NPM_CONFIG_LOGLEVEL error
# Do not use SASL authentication with kafka
ENV WITH_SASL 0

RUN node --version
RUN yarn --version
RUN npm --version

RUN mkdir -p /app/source

# Install bunyan
RUN yarn global add \
--ignore-optional \
--no-progress \
--no-emoji \
--no-cache \
bunyan

# Install any built-in connectors in /app/
# use npm because there isn't a package.json
WORKDIR /app

RUN npm init --yes &> /dev/null \
&& npm install \
--build \
--no-package-lock \
--no-optional \
'terafoundation_kafka_connector@~0.11.1' \
&& npm cache clean --force

WORKDIR /app/source

# verify node-rdkafka is installed right
RUN node --print --eval "require('node-rdkafka')"

ENV NODE_ENV production

Expand All @@ -24,19 +59,38 @@ RUN yarn --prod=false --frozen-lockfile \
--ignore-scripts \
&& yarn cache clean


COPY service.js /app/source/

# verify node-rdkafka is installed right
RUN node -e "require('node-rdkafka')"

# verify teraslice is installed right
RUN node -e "require('teraslice')"
FROM node:${NODE_VERSION}-bookworm-slim

# Affects garbage collection. This default gets overwritten by the memory setting in kubernetes
ENV NODE_OPTIONS "--max-old-space-size=2048"
ENV NODE_ENV production

EXPOSE 5678

# set up the volumes
VOLUME /app/config /app/logs /app/assets
ENV TERAFOUNDATION_CONFIG /app/config/teraslice.yaml

# Use tini to handle sigterm and zombie processes
ENTRYPOINT ["/usr/bin/tini", "--"]

CMD ["node", "service.js"]

RUN apt-get update && \
apt-get install -y libcurl4 tini

# this can most likely be removed. Looks to be related to node10->12 transition.
COPY scripts/docker-pkg-fix.js /usr/local/bin/docker-pkg-fix
COPY scripts/wait-for-it.sh /usr/local/bin/wait-for-it
COPY --from=base /app /app

WORKDIR /app/source

# verify node-rdkafka is installed right
RUN node -e "require('node-rdkafka')"

# verify teraslice is installed right
RUN node -e "require('teraslice')"
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice-workspace",
"displayName": "Teraslice",
"version": "0.90.0",
"version": "0.91.0",
"private": true,
"homepage": "https://github.com/terascope/teraslice",
"bugs": {
Expand Down
2 changes: 1 addition & 1 deletion packages/teraslice/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice",
"displayName": "Teraslice",
"version": "0.90.0",
"version": "0.91.0",
"description": "Distributed computing platform for processing JSON data",
"homepage": "https://github.com/terascope/teraslice#readme",
"bugs": {
Expand Down
23 changes: 23 additions & 0 deletions scripts/docker-pkg-fix.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env node

'use strict';

const fs = require('fs');

const tmpPkgJSONPath = '/tmp/package.json';
const pkgJSONPath = '/app/source/package.json';

const arg = process.argv[2];

const pkgJSON = JSON.parse(fs.readFileSync(pkgJSONPath));
if (arg === 'pre') {
fs.renameSync(pkgJSONPath, tmpPkgJSONPath);
delete pkgJSON.workspaces;
fs.writeFileSync(pkgJSONPath, JSON.stringify(pkgJSON, null, 4));
} else if (arg === 'post') {
fs.unlinkSync(pkgJSONPath);
fs.renameSync(tmpPkgJSONPath, pkgJSONPath);
} else {
console.error('Expected first arg to be either "pre" or "post"');
process.exit(1);
}
173 changes: 173 additions & 0 deletions scripts/wait-for-it.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/usr/bin/env bash
# Use this script to test if a given TCP host/port are available

cmdname=$(basename $0)

echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi; }

usage() {
cat <<USAGE >&2
Usage:
$cmdname host:port [-s] [-t timeout] [-- command args]
-h HOST | --host=HOST Host or IP under test
-p PORT | --port=PORT TCP port under test
Alternatively, you specify the host and port as host:port
-s | --strict Only execute subcommand if the test succeeds
-q | --quiet Don't output any status messages
-t TIMEOUT | --timeout=TIMEOUT
Timeout in seconds, zero for no timeout
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit 1
}

wait_for() {
if [[ $TIMEOUT -gt 0 ]]; then
echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
else
echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
fi
start_ts=$(date +%s)
while :; do
if [[ $ISBUSY -eq 1 ]]; then
nc -z $HOST $PORT
result=$?
else
(echo >/dev/tcp/$HOST/$PORT) >/dev/null 2>&1
result=$?
fi
if [[ $result -eq 0 ]]; then
end_ts=$(date +%s)
echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
break
fi
sleep 1
done
return $result
}

wait_for_wrapper() {
# In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
if [[ $QUIET -eq 1 ]]; then
timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
else
timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
fi
PID=$!
trap "kill -INT -$PID" INT
wait $PID
RESULT=$?
if [[ $RESULT -ne 0 ]]; then
echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
fi
return $RESULT
}

# process arguments
while [[ $# -gt 0 ]]; do
case "$1" in
*:*)
hostport=(${1//:/ })
HOST=${hostport[0]}
PORT=${hostport[1]}
shift 1
;;
--child)
CHILD=1
shift 1
;;
-q | --quiet)
QUIET=1
shift 1
;;
-s | --strict)
STRICT=1
shift 1
;;
-h)
HOST="$2"
if [[ $HOST == "" ]]; then break; fi
shift 2
;;
--host=*)
HOST="${1#*=}"
shift 1
;;
-p)
PORT="$2"
if [[ $PORT == "" ]]; then break; fi
shift 2
;;
--port=*)
PORT="${1#*=}"
shift 1
;;
-t)
TIMEOUT="$2"
if [[ $TIMEOUT == "" ]]; then break; fi
shift 2
;;
--timeout=*)
TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
CLI=("$@")
break
;;
--help)
usage
;;
*)
echoerr "Unknown argument: $1"
usage
;;
esac
done

if [[ "$HOST" == "" || "$PORT" == "" ]]; then
echoerr "Error: you need to provide a host and port to test."
usage
fi

TIMEOUT=${TIMEOUT:-15}
STRICT=${STRICT:-0}
CHILD=${CHILD:-0}
QUIET=${QUIET:-0}

# check to see if timeout is from busybox?
TIMEOUT_PATH=$(realpath $(which timeout))
if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
ISBUSY=1
# We can comment this out because -t doesn't exist
# in our version of busybox.
# BUSYTIMEFLAG="-t"
else
ISBUSY=0
BUSYTIMEFLAG=""
fi

if [[ $CHILD -gt 0 ]]; then
wait_for
RESULT=$?
exit $RESULT
else
if [[ $TIMEOUT -gt 0 ]]; then
wait_for_wrapper
RESULT=$?
else
wait_for
RESULT=$?
fi
fi

if [[ $CLI != "" ]]; then
if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
echoerr "$cmdname: strict mode, refusing to execute subprocess"
exit $RESULT
fi
exec "${CLI[@]}"
else
exit $RESULT
fi
Loading