diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index a9ff8760e763..9d32cb119d41 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -15,6 +15,7 @@ on:
permissions:
contents: read # to fetch code (actions/checkout)
+ packages: write # to write images to GitHub Container Registry (GHCR)
jobs:
####################################################
@@ -147,4 +148,102 @@ jobs:
tags_flavor: suffix=-loadsql
secrets:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
- DOCKER_ACCESS_TOKEN: ${{ secrets.DOCKER_ACCESS_TOKEN }}
\ No newline at end of file
+ DOCKER_ACCESS_TOKEN: ${{ secrets.DOCKER_ACCESS_TOKEN }}
+
+ #################################################################################
+ # Test Deployment via Docker to ensure newly built images are working properly
+ #################################################################################
+ docker-deploy:
+ # Ensure this job never runs on forked repos. It's only executed for 'dspace/dspace'
+ if: github.repository == 'dspace/dspace'
+ runs-on: ubuntu-latest
+ # Must run after all major images are built
+ needs: [dspace, dspace-test, dspace-cli, dspace-postgres-pgcrypto, dspace-solr]
+ env:
+ # Override defaults dspace.server.url because backend starts at http://127.0.0.1:8080
+ dspace__P__server__P__url: http://127.0.0.1:8080/server
+ # Enable all optional modules / controllers for this test deployment.
+ # This helps check for errors in deploying these modules via Spring Boot
+ iiif__P__enabled: true
+ ldn__P__enabled: true
+ oai__P__enabled: true
+ rdf__P__enabled: true
+ signposting__P__enabled: true
+ sword__D__server__P__enabled: true
+ swordv2__D__server__P__enabled: true
+ # If this is a PR against main (default branch), use "latest".
+ # Else if this is a PR against a different branch, used the base branch name.
+ # Else if this is a commit on main (default branch), use the "latest" tag.
+ # Else, just use the branch name.
+ # NOTE: DSPACE_VER is used because our docker compose scripts default to using the "-test" image.
+ DSPACE_VER: ${{ (github.event_name == 'pull_request' && github.event.pull_request.base.ref == github.event.repository.default_branch && 'latest') || (github.event_name == 'pull_request' && github.event.pull_request.base.ref) || (github.ref_name == github.event.repository.default_branch && 'latest') || github.ref_name }}
+ # Docker Registry to use for Docker compose scripts below.
+ # We use GitHub's Container Registry to avoid aggressive rate limits at DockerHub.
+ DOCKER_REGISTRY: ghcr.io
+ steps:
+ # Checkout our codebase (to get access to Docker Compose scripts)
+ - name: Checkout codebase
+ uses: actions/checkout@v4
+ # Download Docker image artifacts (which were just built by reusable-docker-build.yml)
+ - name: Download Docker image artifacts
+ uses: actions/download-artifact@v4
+ with:
+ # Download all amd64 Docker images (TAR files) into the /tmp/docker directory
+ pattern: docker-image-*-linux-amd64
+ path: /tmp/docker
+ merge-multiple: true
+ # Load each of the images into Docker by calling "docker image load" for each.
+ # This ensures we are using the images just built & not any prior versions on DockerHub
+ - name: Load all downloaded Docker images
+ run: |
+ find /tmp/docker -type f -name "*.tar" -exec docker image load --input "{}" \;
+ docker image ls -a
+ # Start backend using our compose script in the codebase.
+ - name: Start backend in Docker
+ run: |
+ docker compose -f docker-compose.yml up -d
+ sleep 10
+ docker container ls
+ # Create a test admin account. Load test data from a simple set of AIPs as defined in cli.ingest.yml
+ - name: Load test data into Backend
+ run: |
+ docker compose -f docker-compose-cli.yml run --rm dspace-cli create-administrator -e test@test.edu -f admin -l user -p admin -c en
+ docker compose -f docker-compose-cli.yml -f dspace/src/main/docker-compose/cli.ingest.yml run --rm dspace-cli
+ # Verify backend started successfully.
+ # 1. Make sure root endpoint is responding (check for dspace.name defined in docker-compose.yml)
+ # 2. Also check /collections endpoint to ensure the test data loaded properly (check for a collection name in AIPs)
+ - name: Verify backend is responding properly
+ run: |
+ result=$(wget -O- -q http://127.0.0.1:8080/server/api)
+ echo "$result"
+ echo "$result" | grep -oE "\"DSpace Started with Docker Compose\","
+ result=$(wget -O- -q http://127.0.0.1:8080/server/api/core/collections)
+ echo "$result"
+ echo "$result" | grep -oE "\"Dog in Yard\","
+ # Verify Handle Server can be stared and is working properly
+ # 1. First generate the "[dspace]/handle-server" folder with the sitebndl.zip
+ # 2. Start the Handle Server (and wait 20 seconds to let it start up)
+ # 3. Verify logs do NOT include "Exception" in the text (as that means an error occurred)
+ # 4. Check that Handle Proxy HTML page is responding on default port (8000)
+ - name: Verify Handle Server is working properly
+ run: |
+ docker exec -i dspace /dspace/bin/make-handle-config
+ echo "Starting Handle Server..."
+ docker exec -i dspace /dspace/bin/start-handle-server
+ sleep 20
+ echo "Checking for errors in error.log"
+ result=$(docker exec -i dspace sh -c "cat /dspace/handle-server/logs/error.log* || echo ''")
+ echo "$result"
+ echo "$result" | grep -vqz "Exception"
+ echo "Checking for errors in handle-server.log..."
+ result=$(docker exec -i dspace cat /dspace/log/handle-server.log)
+ echo "$result"
+ echo "$result" | grep -vqz "Exception"
+ echo "Checking to see if Handle Proxy webpage is available..."
+ result=$(wget -O- -q http://127.0.0.1:8000/)
+ echo "$result"
+ echo "$result" | grep -oE "Handle Proxy"
+ # Shutdown our containers
+ - name: Shutdown Docker containers
+ run: |
+ docker compose -f docker-compose.yml down
diff --git a/.github/workflows/reusable-docker-build.yml b/.github/workflows/reusable-docker-build.yml
index 7a8de661fa68..7a8abda3e106 100644
--- a/.github/workflows/reusable-docker-build.yml
+++ b/.github/workflows/reusable-docker-build.yml
@@ -54,10 +54,13 @@ env:
# For a new commit on default branch (main), use the literal tag 'latest' on Docker image.
# For a new commit on other branches, use the branch name as the tag for Docker image.
# For a new tag, copy that tag name as the tag for Docker image.
+ # For a pull request, use the name of the base branch that the PR was created against or "latest" (for main).
+ # e.g. PR against 'main' will use "latest". a PR against 'dspace-7_x' will use 'dspace-7_x'.
IMAGE_TAGS: |
type=raw,value=latest,enable=${{ github.ref_name == github.event.repository.default_branch }}
type=ref,event=branch,enable=${{ github.ref_name != github.event.repository.default_branch }}
type=ref,event=tag
+ type=raw,value=${{ (github.event.pull_request.base.ref == github.event.repository.default_branch && 'latest') || github.event.pull_request.base.ref }},enable=${{ github.event_name == 'pull_request' }}
# Define default tag "flavor" for docker/metadata-action per
# https://github.com/docker/metadata-action#flavor-input
# We manage the 'latest' tag ourselves to the 'main' branch (see settings above)
@@ -72,6 +75,9 @@ env:
DEPLOY_DEMO_BRANCH: 'dspace-8_x'
DEPLOY_SANDBOX_BRANCH: 'main'
DEPLOY_ARCH: 'linux/amd64'
+ # Registry used during building of Docker images. (All images are later copied to docker.io registry)
+ # We use GitHub's Container Registry to avoid aggressive rate limits at DockerHub.
+ DOCKER_BUILD_REGISTRY: ghcr.io
jobs:
docker-build:
@@ -96,6 +102,7 @@ jobs:
# This step converts the slashes in the "arch" matrix values above into dashes & saves to env.ARCH_NAME
# E.g. "linux/amd64" becomes "linux-amd64"
# This is necessary because all upload artifacts CANNOT have special chars (like slashes)
+ # NOTE: The regex-like syntax below is Bash Parameter Substitution
- name: Prepare
run: |
platform=${{ matrix.arch }}
@@ -105,35 +112,45 @@ jobs:
- name: Checkout codebase
uses: actions/checkout@v4
- # https://github.com/docker/setup-buildx-action
- - name: Setup Docker Buildx
- uses: docker/setup-buildx-action@v3
+ # https://github.com/docker/login-action
+ # NOTE: This login occurs for BOTH non-PRs or PRs. PRs *must* also login to access private images from GHCR
+ # during the build process
+ - name: Login to ${{ env.DOCKER_BUILD_REGISTRY }}
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.DOCKER_BUILD_REGISTRY }}
+ username: ${{ github.repository_owner }}
+ password: ${{ secrets.GITHUB_TOKEN }}
# https://github.com/docker/setup-qemu-action
- name: Set up QEMU emulation to build for multiple architectures
uses: docker/setup-qemu-action@v3
- # https://github.com/docker/login-action
- - name: Login to DockerHub
- # Only login if not a PR, as PRs only trigger a Docker build and not a push
- if: ${{ ! matrix.isPr }}
- uses: docker/login-action@v3
- with:
- username: ${{ secrets.DOCKER_USERNAME }}
- password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
+ # https://github.com/docker/setup-buildx-action
+ - name: Setup Docker Buildx
+ uses: docker/setup-buildx-action@v3
# https://github.com/docker/metadata-action
- # Get Metadata for docker_build_deps step below
- - name: Sync metadata (tags, labels) from GitHub to Docker for image
+ # Extract metadata used for Docker images in all build steps below
+ - name: Extract metadata (tags, labels) from GitHub for Docker image
id: meta_build
uses: docker/metadata-action@v5
with:
- images: ${{ env.IMAGE_NAME }}
+ images: ${{ env.DOCKER_BUILD_REGISTRY }}/${{ env.IMAGE_NAME }}
tags: ${{ env.IMAGE_TAGS }}
flavor: ${{ env.TAGS_FLAVOR }}
+ #--------------------------------------------------------------------
+ # First, for all branch commits (non-PRs) we build the image & upload
+ # to GitHub Container Registry (GHCR). After uploading the image
+ # to GHCR, we store the image digest in an artifact, so we can
+ # create a merged manifest later (see 'docker-build_manifest' job).
+ #
+ # NOTE: We use GHCR in order to avoid aggressive rate limits at DockerHub.
+ #--------------------------------------------------------------------
# https://github.com/docker/build-push-action
- - name: Build and push image
+ - name: Build and push image to ${{ env.DOCKER_BUILD_REGISTRY }}
+ if: ${{ ! matrix.isPr }}
id: docker_build
uses: docker/build-push-action@v5
with:
@@ -141,15 +158,20 @@ jobs:
${{ inputs.dockerfile_additional_contexts }}
context: ${{ inputs.dockerfile_context }}
file: ${{ inputs.dockerfile_path }}
+ # Tell DSpace's Docker files to use the build registry instead of DockerHub
+ build-args:
+ DOCKER_REGISTRY=${{ env.DOCKER_BUILD_REGISTRY }}
platforms: ${{ matrix.arch }}
- # For pull requests, we run the Docker build (to ensure no PR changes break the build),
- # but we ONLY do an image push to DockerHub if it's NOT a PR
- push: ${{ ! matrix.isPr }}
+ push: true
# Use tags / labels provided by 'docker/metadata-action' above
tags: ${{ steps.meta_build.outputs.tags }}
labels: ${{ steps.meta_build.outputs.labels }}
+ # Use GitHub cache to load cached Docker images and cache the results of this build
+ # This decreases the number of images we need to fetch from DockerHub
+ cache-from: type=gha,scope=${{ inputs.build_id }}
+ cache-to: type=gha,scope=${{ inputs.build_id }},mode=max
- # Export the digest of Docker build locally (for non PRs only)
+ # Export the digest of Docker build locally
- name: Export Docker build digest
if: ${{ ! matrix.isPr }}
run: |
@@ -157,7 +179,8 @@ jobs:
digest="${{ steps.docker_build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- # Upload digest to an artifact, so that it can be used in manifest below
+ # Upload digest to an artifact, so that it can be used in combined manifest below
+ # (The purpose of the combined manifest is to list both amd64 and arm64 builds under same tag)
- name: Upload Docker build digest to artifact
if: ${{ ! matrix.isPr }}
uses: actions/upload-artifact@v4
@@ -167,33 +190,60 @@ jobs:
if-no-files-found: error
retention-days: 1
- # If this build is NOT a PR and passed in a REDEPLOY_SANDBOX_URL secret,
- # Then redeploy https://sandbox.dspace.org if this build is for our deployment architecture and 'main' branch.
- - name: Redeploy sandbox.dspace.org (based on main branch)
- if: |
- !matrix.isPR &&
- env.REDEPLOY_SANDBOX_URL != '' &&
- matrix.arch == env.DEPLOY_ARCH &&
- github.ref_name == env.DEPLOY_SANDBOX_BRANCH
- run: |
- curl -X POST $REDEPLOY_SANDBOX_URL
+ #------------------------------------------------------------------------------
+ # Second, we build the image again in order to store it in a local TAR file.
+ # This TAR of the image is cached/saved as an artifact, so that it can be used
+ # by later jobs to install the brand-new images for automated testing.
+ # This TAR build is performed BOTH for PRs and for branch commits (non-PRs).
+ #
+ # (This approach has the advantage of avoiding having to download the newly built
+ # image from DockerHub or GHCR during automated testing.)
+ #
+ # See the 'docker-deploy' job in docker.yml as an example of where this TAR is used.
+ #-------------------------------------------------------------------------------
+ # Build local image (again) and store in a TAR file in /tmp directory
+ # This step is only done for AMD64, as that's the only image we use in our automated testing (at this time).
+ # NOTE: This step cannot be combined with the build above as it's a different type of output.
+ - name: Build and push image to local TAR file
+ if: ${{ matrix.arch == 'linux/amd64'}}
+ uses: docker/build-push-action@v5
+ with:
+ build-contexts: |
+ ${{ inputs.dockerfile_additional_contexts }}
+ context: ${{ inputs.dockerfile_context }}
+ file: ${{ inputs.dockerfile_path }}
+ # Tell DSpace's Docker files to use the build registry instead of DockerHub
+ build-args:
+ DOCKER_REGISTRY=${{ env.DOCKER_BUILD_REGISTRY }}
+ platforms: ${{ matrix.arch }}
+ tags: ${{ steps.meta_build.outputs.tags }}
+ labels: ${{ steps.meta_build.outputs.labels }}
+ # Use GitHub cache to load cached Docker images and cache the results of this build
+ # This decreases the number of images we need to fetch from DockerHub
+ cache-from: type=gha,scope=${{ inputs.build_id }}
+ cache-to: type=gha,scope=${{ inputs.build_id }},mode=max
+ # Export image to a local TAR file
+ outputs: type=docker,dest=/tmp/${{ inputs.build_id }}.tar
- # If this build is NOT a PR and passed in a REDEPLOY_DEMO_URL secret,
- # Then redeploy https://demo.dspace.org if this build is for our deployment architecture and demo branch.
- - name: Redeploy demo.dspace.org (based on maintenance branch)
- if: |
- !matrix.isPR &&
- env.REDEPLOY_DEMO_URL != '' &&
- matrix.arch == env.DEPLOY_ARCH &&
- github.ref_name == env.DEPLOY_DEMO_BRANCH
- run: |
- curl -X POST $REDEPLOY_DEMO_URL
+ # Upload the local docker image (in TAR file) to a build Artifact
+ # This step is only done for AMD64, as that's the only image we use in our automated testing (at this time).
+ - name: Upload local image TAR to artifact
+ if: ${{ matrix.arch == 'linux/amd64'}}
+ uses: actions/upload-artifact@v4
+ with:
+ name: docker-image-${{ inputs.build_id }}-${{ env.ARCH_NAME }}
+ path: /tmp/${{ inputs.build_id }}.tar
+ if-no-files-found: error
+ retention-days: 1
- # Merge Docker digests (from various architectures) into a manifest.
- # This runs after all Docker builds complete above, and it tells hub.docker.com
- # that these builds should be all included in the manifest for this tag.
- # (e.g. AMD64 and ARM64 should be listed as options under the same tagged Docker image)
+ ##########################################################################################
+ # Merge Docker digests (from various architectures) into a single manifest.
+ # This runs after all Docker builds complete above. The purpose is to include all builds
+ # under a single manifest for this tag.
+ # (e.g. both linux/amd64 and linux/arm64 should be listed under the same tagged Docker image)
+ ##########################################################################################
docker-build_manifest:
+ # Only run if this is NOT a PR
if: ${{ github.event_name != 'pull_request' }}
runs-on: ubuntu-latest
needs:
@@ -207,29 +257,102 @@ jobs:
pattern: digests-${{ inputs.build_id }}-*
merge-multiple: true
+ - name: Login to ${{ env.DOCKER_BUILD_REGISTRY }}
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.DOCKER_BUILD_REGISTRY }}
+ username: ${{ github.repository_owner }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Add Docker metadata for image
id: meta
uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.DOCKER_BUILD_REGISTRY }}/${{ env.IMAGE_NAME }}
+ tags: ${{ env.IMAGE_TAGS }}
+ flavor: ${{ env.TAGS_FLAVOR }}
+
+ - name: Create manifest list from digests and push to ${{ env.DOCKER_BUILD_REGISTRY }}
+ working-directory: /tmp/digests
+ run: |
+ docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+ $(printf '${{ env.DOCKER_BUILD_REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *)
+
+ - name: Inspect manifest in ${{ env.DOCKER_BUILD_REGISTRY }}
+ run: |
+ docker buildx imagetools inspect ${{ env.DOCKER_BUILD_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+
+ ##########################################################################################
+ # Copy images / manifest to DockerHub.
+ # This MUST run after *both* images (AMD64 and ARM64) are built and uploaded to GitHub
+ # Container Registry (GHCR). Attempting to run this in parallel to GHCR builds can result
+ # in a race condition...i.e. the copy to DockerHub may fail if GHCR image has been updated
+ # at the moment when the copy occurs.
+ ##########################################################################################
+ docker-copy_to_dockerhub:
+ # Only run if this is NOT a PR
+ if: ${{ github.event_name != 'pull_request' }}
+ runs-on: ubuntu-latest
+ needs:
+ - docker-build_manifest
+
+ steps:
+ # 'regctl' is used to more easily copy the image to DockerHub and obtain the digest from DockerHub
+ # See https://github.com/regclient/regclient/blob/main/docs/regctl.md
+ - name: Install regctl for Docker registry tools
+ uses: regclient/actions/regctl-installer@main
+ with:
+ release: 'v0.8.0'
+
+ # This recreates Docker tags for DockerHub
+ - name: Add Docker metadata for image
+ id: meta_dockerhub
+ uses: docker/metadata-action@v5
with:
images: ${{ env.IMAGE_NAME }}
tags: ${{ env.IMAGE_TAGS }}
flavor: ${{ env.TAGS_FLAVOR }}
- - name: Login to Docker Hub
+ # Login to source registry first, as this is where we are copying *from*
+ - name: Login to ${{ env.DOCKER_BUILD_REGISTRY }}
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.DOCKER_BUILD_REGISTRY }}
+ username: ${{ github.repository_owner }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ # Login to DockerHub, since this is where we are copying *to*
+ - name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- - name: Create manifest list from digests and push
- working-directory: /tmp/digests
+ # Copy the image from source to DockerHub
+ - name: Copy image from ${{ env.DOCKER_BUILD_REGISTRY }} to docker.io
run: |
- docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
- $(printf '${{ env.IMAGE_NAME }}@sha256:%s ' *)
+ regctl image copy ${{ env.DOCKER_BUILD_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta_dockerhub.outputs.version }} docker.io/${{ env.IMAGE_NAME }}:${{ steps.meta_dockerhub.outputs.version }}
- - name: Inspect image
+ #--------------------------------------------------------------------
+ # Finally, check whether demo.dspace.org or sandbox.dspace.org need
+ # to be redeployed based on these new DockerHub images.
+ #--------------------------------------------------------------------
+ # If this build is for the branch that Sandbox uses and passed in a REDEPLOY_SANDBOX_URL secret,
+ # Then redeploy https://sandbox.dspace.org
+ - name: Redeploy sandbox.dspace.org (based on main branch)
+ if: |
+ env.REDEPLOY_SANDBOX_URL != '' &&
+ github.ref_name == env.DEPLOY_SANDBOX_BRANCH
+ run: |
+ curl -X POST $REDEPLOY_SANDBOX_URL
+ # If this build is for the branch that Demo uses and passed in a REDEPLOY_DEMO_URL secret,
+ # Then redeploy https://demo.dspace.org
+ - name: Redeploy demo.dspace.org (based on maintenance branch)
+ if: |
+ env.REDEPLOY_DEMO_URL != '' &&
+ github.ref_name == env.DEPLOY_DEMO_BRANCH
run: |
- docker buildx imagetools inspect ${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+ curl -X POST $REDEPLOY_DEMO_URL
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 75817980379c..5aece8b7d37e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,10 +6,14 @@
# This Dockerfile uses JDK17 by default.
# To build with other versions, use "--build-arg JDK_VERSION=[value]"
ARG JDK_VERSION=17
+# The Docker version tag to build from
ARG DSPACE_VERSION=latest
+# The Docker registry to use for DSpace images. Defaults to "docker.io"
+# NOTE: non-DSpace images are hardcoded to use "docker.io" and are not impacted by this build argument
+ARG DOCKER_REGISTRY=docker.io
# Step 1 - Run Maven Build
-FROM dspace/dspace-dependencies:${DSPACE_VERSION} AS build
+FROM ${DOCKER_REGISTRY}/dspace/dspace-dependencies:${DSPACE_VERSION} AS build
ARG TARGET_DIR=dspace-installer
WORKDIR /app
# The dspace-installer directory will be written to /install
@@ -31,35 +35,38 @@ RUN mvn --no-transfer-progress package ${MAVEN_FLAGS} && \
RUN rm -rf /install/webapps/server/
# Step 2 - Run Ant Deploy
-FROM eclipse-temurin:${JDK_VERSION} AS ant_build
+FROM docker.io/eclipse-temurin:${JDK_VERSION} AS ant_build
ARG TARGET_DIR=dspace-installer
# COPY the /install directory from 'build' container to /dspace-src in this container
COPY --from=build /install /dspace-src
WORKDIR /dspace-src
# Create the initial install deployment using ANT
-ENV ANT_VERSION 1.10.13
-ENV ANT_HOME /tmp/ant-$ANT_VERSION
-ENV PATH $ANT_HOME/bin:$PATH
-# Need wget to install ant
-RUN apt-get update \
- && apt-get install -y --no-install-recommends wget \
- && apt-get purge -y --auto-remove \
- && rm -rf /var/lib/apt/lists/*
+ENV ANT_VERSION=1.10.13
+ENV ANT_HOME=/tmp/ant-$ANT_VERSION
+ENV PATH=$ANT_HOME/bin:$PATH
# Download and install 'ant'
RUN mkdir $ANT_HOME && \
- wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME
+ curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \
+ https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \
+ tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \
+ rm /tmp/apache-ant.tar.gz
# Run necessary 'ant' deploy scripts
RUN ant init_installation update_configs update_code update_webapps
# Step 3 - Start up DSpace via Runnable JAR
-FROM eclipse-temurin:${JDK_VERSION}
+FROM docker.io/eclipse-temurin:${JDK_VERSION}
# NOTE: DSPACE_INSTALL must align with the "dspace.dir" default configuration.
ENV DSPACE_INSTALL=/dspace
# Copy the /dspace directory from 'ant_build' container to /dspace in this container
COPY --from=ant_build /dspace $DSPACE_INSTALL
WORKDIR $DSPACE_INSTALL
-# Expose Tomcat port
-EXPOSE 8080
+# Need host command for "[dspace]/bin/make-handle-config"
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends host \
+ && apt-get purge -y --auto-remove \
+ && rm -rf /var/lib/apt/lists/*
+# Expose Tomcat port (8080) & Handle Server HTTP port (8000)
+EXPOSE 8080 8000
# Give java extra memory (2GB)
ENV JAVA_OPTS=-Xmx2000m
# On startup, run DSpace Runnable JAR
diff --git a/Dockerfile.cli b/Dockerfile.cli
index 5254d1eb4d69..e43c8eb95dd6 100644
--- a/Dockerfile.cli
+++ b/Dockerfile.cli
@@ -6,10 +6,14 @@
# This Dockerfile uses JDK17 by default.
# To build with other versions, use "--build-arg JDK_VERSION=[value]"
ARG JDK_VERSION=17
+# The Docker version tag to build from
ARG DSPACE_VERSION=latest
+# The Docker registry to use for DSpace images. Defaults to "docker.io"
+# NOTE: non-DSpace images are hardcoded to use "docker.io" and are not impacted by this build argument
+ARG DOCKER_REGISTRY=docker.io
# Step 1 - Run Maven Build
-FROM dspace/dspace-dependencies:${DSPACE_VERSION} AS build
+FROM ${DOCKER_REGISTRY}/dspace/dspace-dependencies:${DSPACE_VERSION} AS build
ARG TARGET_DIR=dspace-installer
WORKDIR /app
# The dspace-installer directory will be written to /install
@@ -25,28 +29,26 @@ RUN mvn --no-transfer-progress package && \
mvn clean
# Step 2 - Run Ant Deploy
-FROM eclipse-temurin:${JDK_VERSION} AS ant_build
+FROM docker.io/eclipse-temurin:${JDK_VERSION} AS ant_build
ARG TARGET_DIR=dspace-installer
# COPY the /install directory from 'build' container to /dspace-src in this container
COPY --from=build /install /dspace-src
WORKDIR /dspace-src
# Create the initial install deployment using ANT
-ENV ANT_VERSION 1.10.13
-ENV ANT_HOME /tmp/ant-$ANT_VERSION
-ENV PATH $ANT_HOME/bin:$PATH
-# Need wget to install ant
-RUN apt-get update \
- && apt-get install -y --no-install-recommends wget \
- && apt-get purge -y --auto-remove \
- && rm -rf /var/lib/apt/lists/*
+ENV ANT_VERSION=1.10.13
+ENV ANT_HOME=/tmp/ant-$ANT_VERSION
+ENV PATH=$ANT_HOME/bin:$PATH
# Download and install 'ant'
RUN mkdir $ANT_HOME && \
- wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME
+ curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \
+ https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \
+ tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \
+ rm /tmp/apache-ant.tar.gz
# Run necessary 'ant' deploy scripts
RUN ant init_installation update_configs update_code
# Step 3 - Run jdk
-FROM eclipse-temurin:${JDK_VERSION}
+FROM docker.io/eclipse-temurin:${JDK_VERSION}
# NOTE: DSPACE_INSTALL must align with the "dspace.dir" default configuration.
ENV DSPACE_INSTALL=/dspace
# Copy the /dspace directory from 'ant_build' container to /dspace in this container
diff --git a/Dockerfile.dependencies b/Dockerfile.dependencies
index f3bf1f833205..04233cd415fa 100644
--- a/Dockerfile.dependencies
+++ b/Dockerfile.dependencies
@@ -6,8 +6,8 @@
# To build with other versions, use "--build-arg JDK_VERSION=[value]"
ARG JDK_VERSION=17
-# Step 1 - Run Maven Build
-FROM maven:3-eclipse-temurin-${JDK_VERSION} AS build
+# Step 1 - Download all Dependencies
+FROM docker.io/maven:3-eclipse-temurin-${JDK_VERSION} AS build
ARG TARGET_DIR=dspace-installer
WORKDIR /app
# Create the 'dspace' user account & home directory
@@ -19,16 +19,60 @@ RUN chown -Rv dspace: /app
# Switch to dspace user & run below commands as that user
USER dspace
-# Copy the DSpace source code (from local machine) into the workdir (excluding .dockerignore contents)
-ADD --chown=dspace . /app/
+# This next part may look odd, but it speeds up the build of this image *significantly*.
+# Copy ONLY the POMs to this image (from local machine). This will allow us to download all dependencies *without*
+# performing any code compilation steps.
+
+# Parent POM
+ADD --chown=dspace pom.xml /app/
+RUN mkdir -p /app/dspace
+
+# 'dspace' module POM. Includes 'additions' ONLY, as it's the only submodule that is required to exist.
+ADD --chown=dspace dspace/pom.xml /app/dspace/
+RUN mkdir -p /app/dspace/modules/
+ADD --chown=dspace dspace/modules/pom.xml /app/dspace/modules/
+RUN mkdir -p /app/dspace/modules/additions
+ADD --chown=dspace dspace/modules/additions/pom.xml /app/dspace/modules/additions/
+
+# 'dspace-api' module POM
+RUN mkdir -p /app/dspace-api
+ADD --chown=dspace dspace-api/pom.xml /app/dspace-api/
+
+# 'dspace-iiif' module POM
+RUN mkdir -p /app/dspace-iiif
+ADD --chown=dspace dspace-iiif/pom.xml /app/dspace-iiif/
+
+# 'dspace-oai' module POM
+RUN mkdir -p /app/dspace-oai
+ADD --chown=dspace dspace-oai/pom.xml /app/dspace-oai/
+
+# 'dspace-rdf' module POM
+RUN mkdir -p /app/dspace-rdf
+ADD --chown=dspace dspace-rdf/pom.xml /app/dspace-rdf/
+
+# 'dspace-server-webapp' module POM
+RUN mkdir -p /app/dspace-server-webapp
+ADD --chown=dspace dspace-server-webapp/pom.xml /app/dspace-server-webapp/
+
+# 'dspace-services' module POM
+RUN mkdir -p /app/dspace-services
+ADD --chown=dspace dspace-services/pom.xml /app/dspace-services/
+
+# 'dspace-sword' module POM
+RUN mkdir -p /app/dspace-sword
+ADD --chown=dspace dspace-sword/pom.xml /app/dspace-sword/
+
+# 'dspace-swordv2' module POM
+RUN mkdir -p /app/dspace-swordv2
+ADD --chown=dspace dspace-swordv2/pom.xml /app/dspace-swordv2/
# Trigger the installation of all maven dependencies (hide download progress messages)
# Maven flags here ensure that we skip final assembly, skip building test environment and skip all code verification checks.
-# These flags speed up this installation as much as reasonably possible.
-ENV MAVEN_FLAGS="-P-assembly -P-test-environment -Denforcer.skip=true -Dcheckstyle.skip=true -Dlicense.skip=true -Dxml.skip=true"
-RUN mvn --no-transfer-progress install ${MAVEN_FLAGS}
+# These flags speed up this installation and skip tasks we cannot perform as we don't have the full source code.
+ENV MAVEN_FLAGS="-P-assembly -P-test-environment -Denforcer.skip=true -Dcheckstyle.skip=true -Dlicense.skip=true -Dxjc.skip=true -Dxml.skip=true"
+RUN mvn --no-transfer-progress verify ${MAVEN_FLAGS}
-# Clear the contents of the /app directory (including all maven builds), so no artifacts remain.
+# Clear the contents of the /app directory (including all maven target folders), so no artifacts remain.
# This ensures when dspace:dspace is built, it will use the Maven local cache (~/.m2) for dependencies
USER root
RUN rm -rf /app/*
diff --git a/Dockerfile.test b/Dockerfile.test
index f3acef00e825..90266101dbf0 100644
--- a/Dockerfile.test
+++ b/Dockerfile.test
@@ -8,10 +8,14 @@
# This Dockerfile uses JDK17 by default.
# To build with other versions, use "--build-arg JDK_VERSION=[value]"
ARG JDK_VERSION=17
+# The Docker version tag to build from
ARG DSPACE_VERSION=latest
+# The Docker registry to use for DSpace images. Defaults to "docker.io"
+# NOTE: non-DSpace images are hardcoded to use "docker.io" and are not impacted by this build argument
+ARG DOCKER_REGISTRY=docker.io
# Step 1 - Run Maven Build
-FROM dspace/dspace-dependencies:${DSPACE_VERSION} AS build
+FROM ${DOCKER_REGISTRY}/dspace/dspace-dependencies:${DSPACE_VERSION} AS build
ARG TARGET_DIR=dspace-installer
WORKDIR /app
# The dspace-installer directory will be written to /install
@@ -30,33 +34,36 @@ RUN mvn --no-transfer-progress package && \
RUN rm -rf /install/webapps/server/
# Step 2 - Run Ant Deploy
-FROM eclipse-temurin:${JDK_VERSION} AS ant_build
+FROM docker.io/eclipse-temurin:${JDK_VERSION} AS ant_build
ARG TARGET_DIR=dspace-installer
# COPY the /install directory from 'build' container to /dspace-src in this container
COPY --from=build /install /dspace-src
WORKDIR /dspace-src
# Create the initial install deployment using ANT
-ENV ANT_VERSION 1.10.12
-ENV ANT_HOME /tmp/ant-$ANT_VERSION
-ENV PATH $ANT_HOME/bin:$PATH
-# Need wget to install ant
-RUN apt-get update \
- && apt-get install -y --no-install-recommends wget \
- && apt-get purge -y --auto-remove \
- && rm -rf /var/lib/apt/lists/*
+ENV ANT_VERSION=1.10.12
+ENV ANT_HOME=/tmp/ant-$ANT_VERSION
+ENV PATH=$ANT_HOME/bin:$PATH
# Download and install 'ant'
RUN mkdir $ANT_HOME && \
- wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME
+ curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \
+ https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \
+ tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \
+ rm /tmp/apache-ant.tar.gz
# Run necessary 'ant' deploy scripts
RUN ant init_installation update_configs update_code update_webapps
# Step 3 - Start up DSpace via Runnable JAR
-FROM eclipse-temurin:${JDK_VERSION}
+FROM docker.io/eclipse-temurin:${JDK_VERSION}
# NOTE: DSPACE_INSTALL must align with the "dspace.dir" default configuration.
ENV DSPACE_INSTALL=/dspace
# Copy the /dspace directory from 'ant_build' container to /dspace in this container
COPY --from=ant_build /dspace $DSPACE_INSTALL
WORKDIR $DSPACE_INSTALL
+# Need host command for "[dspace]/bin/make-handle-config"
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends host \
+ && apt-get purge -y --auto-remove \
+ && rm -rf /var/lib/apt/lists/*
# Expose Tomcat port and debugging port
EXPOSE 8080 8000
# Give java extra memory (2GB)
diff --git a/docker-compose-cli.yml b/docker-compose-cli.yml
index 91f89916d208..3e2c9ba6a50a 100644
--- a/docker-compose-cli.yml
+++ b/docker-compose-cli.yml
@@ -6,7 +6,7 @@ networks:
external: true
services:
dspace-cli:
- image: "${DOCKER_OWNER:-dspace}/dspace-cli:${DSPACE_VER:-latest}"
+ image: "${DOCKER_REGISTRY:-docker.io}/${DOCKER_OWNER:-dspace}/dspace-cli:${DSPACE_VER:-latest}"
container_name: dspace-cli
build:
context: .
diff --git a/docker-compose.yml b/docker-compose.yml
index 6a930a8d31ec..ab4f8adc98c0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -28,7 +28,7 @@ services:
# from the host machine. This IP range MUST correspond to the 'dspacenet' subnet defined above.
proxies__P__trusted__P__ipranges: '172.23.0'
LOGGING_CONFIG: /dspace/config/log4j2-container.xml
- image: "${DOCKER_OWNER:-dspace}/dspace:${DSPACE_VER:-latest-test}"
+ image: "${DOCKER_REGISTRY:-docker.io}/${DOCKER_OWNER:-dspace}/dspace:${DSPACE_VER:-latest-test}"
build:
context: .
dockerfile: Dockerfile.test
@@ -64,7 +64,7 @@ services:
dspacedb:
container_name: dspacedb
# Uses a custom Postgres image with pgcrypto installed
- image: "${DOCKER_OWNER:-dspace}/dspace-postgres-pgcrypto:${DSPACE_VER:-latest}"
+ image: "${DOCKER_REGISTRY:-docker.io}/${DOCKER_OWNER:-dspace}/dspace-postgres-pgcrypto:${DSPACE_VER:-latest}"
build:
# Must build out of subdirectory to have access to install script for pgcrypto
context: ./dspace/src/main/docker/dspace-postgres-pgcrypto/
@@ -84,7 +84,7 @@ services:
# DSpace Solr container
dspacesolr:
container_name: dspacesolr
- image: "${DOCKER_OWNER:-dspace}/dspace-solr:${DSPACE_VER:-latest}"
+ image: "${DOCKER_REGISTRY:-docker.io}/${DOCKER_OWNER:-dspace}/dspace-solr:${DSPACE_VER:-latest}"
build:
context: ./dspace/src/main/docker/dspace-solr/
# Provide path to Solr configs necessary to build Docker image
diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml
index a80f27bc22be..599303275f9f 100644
--- a/dspace-api/pom.xml
+++ b/dspace-api/pom.xml
@@ -341,6 +341,14 @@
org.apache.logging.log4j
log4j-api
+
+ org.apache.logging.log4j
+ log4j-core
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+
org.hibernate.orm
hibernate-core
@@ -388,6 +396,13 @@
org.springframework
spring-orm
+
+
+
+ org.springframework
+ spring-jcl
+
+
@@ -406,6 +421,16 @@
org.mortbay.jasper
apache-jsp
+
+
+ org.bouncycastle
+ bcpkix-jdk15on
+
+
+ org.bouncycastle
+ bcprov-jdk15on
+
@@ -623,7 +648,7 @@
dnsjava
dnsjava
- 3.6.0
+ 3.6.2
@@ -672,22 +697,6 @@
com.google.apis
google-api-services-analytics
-
- com.google.api-client
- google-api-client
-
-
- com.google.http-client
- google-http-client
-
-
- com.google.http-client
- google-http-client-jackson2
-
-
- com.google.oauth-client
- google-oauth-client
-
@@ -702,7 +711,6 @@
jakarta.inject
jakarta.inject-api
- 2.0.1
@@ -733,7 +741,7 @@
com.amazonaws
aws-java-sdk-s3
- 1.12.261
+ 1.12.779
-
-
- io.netty
- netty-buffer
- 4.1.114.Final
-
-
- io.netty
- netty-transport
- 4.1.114.Final
-
-
- io.netty
- netty-transport-native-unix-common
- 4.1.114.Final
-
-
- io.netty
- netty-common
- 4.1.114.Final
-
-
- io.netty
- netty-handler
- 4.1.114.Final
-
-
- io.netty
- netty-codec
- 4.1.114.Final
-
-
- org.apache.velocity
- velocity-engine-core
- 2.3
-
-
- org.xmlunit
- xmlunit-core
- 2.10.0
- test
-
-
- com.github.java-json-tools
- json-schema-validator
- 2.2.14
-
-
- jakarta.validation
- jakarta.validation-api
- 3.1.0
-
-
- io.swagger
- swagger-core
- 1.6.2
-
-
- org.scala-lang
- scala-library
- 2.13.11
- test
-
-
-
-
diff --git a/dspace-api/src/main/java/org/dspace/app/mediafilter/TikaTextExtractionFilter.java b/dspace-api/src/main/java/org/dspace/app/mediafilter/TikaTextExtractionFilter.java
index e83bf706ed02..17e7b85e9bfc 100644
--- a/dspace-api/src/main/java/org/dspace/app/mediafilter/TikaTextExtractionFilter.java
+++ b/dspace-api/src/main/java/org/dspace/app/mediafilter/TikaTextExtractionFilter.java
@@ -18,6 +18,7 @@
import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import org.apache.poi.util.IOUtils;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -72,21 +73,23 @@ public InputStream getDestinationStream(Item currentItem, InputStream source, bo
// Not using temporary file. We'll use Tika's default in-memory parsing.
// Get maximum characters to extract. Default is 100,000 chars, which is also Tika's default setting.
String extractedText;
- int maxChars = configurationService.getIntProperty("textextractor.max-chars", 100000);
+ int maxChars = configurationService.getIntProperty("textextractor.max-chars", 100_000);
try {
// Use Tika to extract text from input. Tika will automatically detect the file type.
Tika tika = new Tika();
tika.setMaxStringLength(maxChars); // Tell Tika the maximum number of characters to extract
+ IOUtils.setByteArrayMaxOverride(
+ configurationService.getIntProperty("textextractor.max-array", 100_000_000));
extractedText = tika.parseToString(source);
} catch (IOException e) {
System.err.format("Unable to extract text from bitstream in Item %s%n", currentItem.getID().toString());
- e.printStackTrace();
+ e.printStackTrace(System.err);
log.error("Unable to extract text from bitstream in Item {}", currentItem.getID().toString(), e);
throw e;
} catch (OutOfMemoryError oe) {
System.err.format("OutOfMemoryError occurred when extracting text from bitstream in Item %s. " +
"You may wish to enable 'textextractor.use-temp-file'.%n", currentItem.getID().toString());
- oe.printStackTrace();
+ oe.printStackTrace(System.err);
log.error("OutOfMemoryError occurred when extracting text from bitstream in Item {}. " +
"You may wish to enable 'textextractor.use-temp-file'.", currentItem.getID().toString(), oe);
throw oe;
diff --git a/dspace-api/src/main/java/org/dspace/app/statistics/package.html b/dspace-api/src/main/java/org/dspace/app/statistics/package.html
index a6d8d8699cf7..931a7039080d 100644
--- a/dspace-api/src/main/java/org/dspace/app/statistics/package.html
+++ b/dspace-api/src/main/java/org/dspace/app/statistics/package.html
@@ -46,8 +46,6 @@
writes event records to the Java logger.
{@link org.dspace.statistics.SolrLoggerUsageEventListener SolrLoggerUsageEventListener}
writes event records to Solr.
- {@link org.dspace.google.GoogleRecorderEventListener GoogleRecorderEventListener}<.dt>
- writes event records to Google Analytics.