From aa0aba060f1dcec15e155db56ee37fc667ec5f81 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 30 Oct 2024 16:35:04 +0100 Subject: [PATCH] Add documentation for Blazegraph backups (#5210) Co-authored-by: Simon Dumas --- .../running-nexus/blazegraph.md | 24 ++ kubernetes/README.md | 2 + kubernetes/blazegraph/backup-cronjob.yaml | 42 +++ kubernetes/blazegraph/backup-script.yaml | 243 ++++++++++++++++++ kubernetes/blazegraph/deleter-cronjob.yaml | 32 +++ 5 files changed, 343 insertions(+) create mode 100644 kubernetes/README.md create mode 100644 kubernetes/blazegraph/backup-cronjob.yaml create mode 100644 kubernetes/blazegraph/backup-script.yaml create mode 100644 kubernetes/blazegraph/deleter-cronjob.yaml diff --git a/docs/src/main/paradox/docs/getting-started/running-nexus/blazegraph.md b/docs/src/main/paradox/docs/getting-started/running-nexus/blazegraph.md index b0d91954e8..eeba9b3d52 100644 --- a/docs/src/main/paradox/docs/getting-started/running-nexus/blazegraph.md +++ b/docs/src/main/paradox/docs/getting-started/running-nexus/blazegraph.md @@ -90,3 +90,27 @@ The Nexus repository gives also: for tests shows how to configure those files via system properties * @link:[A python script](https://github.com/BlueBrain/nexus/blob/master/blazegraph/prometheus-exporter/prometheus-blazegraph-exporter.py) allowing to scrape Blazegraph metrics so as to push them to a Prometheus instance + +## Backups + +Pushing data to Blazegraph is time-consuming especially via the API as triples are verbose and Blazegraph +does not allow large payloads. + +So even if it is possible to repopulate a Blazegraph instance from the primary store, it is better to perform backup using +@link:[the online backup api endpoint](https://github.com/blazegraph/database/wiki/REST_API#online-backup). + +@link:[Here](https://github.com/BlueBrain/nexus/blob/$git.branch$/kubernetes/backup-script.yaml) is an example of a backup script +using this endpoint, compressing the resulting file and creating a checksum out it. + +The Nexus repository also provides: + +* @link:[A Kubernetes cronjob to run it provided](https://github.com/BlueBrain/nexus/blob/$git.branch$/kubernetes/backup-cronjob.yaml). +* @link:[A Kubernetes cronjob allowing to delete old backups](https://github.com/BlueBrain/nexus/blob/$git.branch$/kubernetes/backup-cronjob.yaml). + +@@@ note + +This script is meant to be run in Kubernetes as a cronjob but should be easily adaptable from other contexts by replacing the `kubectl` commands. + +@@@ + +To restore a backup, the journal file related to the instance needs to be replaced by the one generated by the backup operation. \ No newline at end of file diff --git a/kubernetes/README.md b/kubernetes/README.md new file mode 100644 index 0000000000..aa1dc3b34f --- /dev/null +++ b/kubernetes/README.md @@ -0,0 +1,2 @@ +This folder gives example of manifests and files allowing to +run Nexus in a Kubernetes environment as it is run in BBP. \ No newline at end of file diff --git a/kubernetes/blazegraph/backup-cronjob.yaml b/kubernetes/blazegraph/backup-cronjob.yaml new file mode 100644 index 0000000000..9da20299f1 --- /dev/null +++ b/kubernetes/blazegraph/backup-cronjob.yaml @@ -0,0 +1,42 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: backups-blazegraph + namespace: nexus +spec: + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + # Every day 01:00 + schedule: "00 01 * * *" + jobTemplate: + spec: + backoffLimit: 2 + template: + metadata: + namespace: nexus + spec: + serviceAccountName: nexus + containers: + - name: backups-blazegraph + image: bitnami/kubectl:latest + imagePullPolicy: IfNotPresent + # blazegraph-0 is the name of the pod for the blazegraph instance + command: + - "/bin/bash" + - "-c" + - "/mnt/backups/bin/blazegraph-backup.sh -n blazegraph-0 -p /mnt/backups --compress --block" + volumeMounts: + - name: backups + mountPath: /mnt/backups + - name: blazegraph-backup-script + mountPath: /mnt/backups/bin/blazegraph-backup.sh + subPath: blazegraph-backup.sh + volumes: + - name: backups + persistentVolumeClaim: + claimName: backups-blazegraph + - name: blazegraph-backup-script + configMap: + name: blazegraph-backup-script + defaultMode: 0555 + restartPolicy: Never diff --git a/kubernetes/blazegraph/backup-script.yaml b/kubernetes/blazegraph/backup-script.yaml new file mode 100644 index 0000000000..ac8744ad1e --- /dev/null +++ b/kubernetes/blazegraph/backup-script.yaml @@ -0,0 +1,243 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: blazegraph-backup-script + namespace: nexus +data: + blazegraph-backup.sh: | + #!/bin/bash + ###################################### + # Name: blazegraph-backup.sh + # Author: Carolina Lindqvist + # Date: 30.05.2022 + # + # Description: This script can back up a Blazegraph node. + # Example usage: $ ./blazegraph-backup.sh -p /mnt/backups --compress --tag pre-update-backup + ####################################### + + ####################################### + # Print help + # Output: + # Prints help to stdout + ####################################### + print_help() { + echo "Usage: $(basename "$0") [option] ... [-b block| -c compress | -p path | -t tag] " + echo + echo " -b, --block block REST API calls to Blazegraph while backing up" + echo " -c, --compress ask Blazegraph to compress the backup" + echo " -d, --debug print commands as they execute" + echo " -h, --help print help and usage information" + echo " -n, --node name of node to backup (blazegraph-0)" + echo " -p, --path path to storage for backups" + echo " -t, --tag name of tag for backup (optional)" + echo + echo "Example: $(basename "$0") -n blazegraph-0 -p /mnt/backups --compress -t pre-update" + } + + ####################################### + # Check if an argument is missing a parameter. + # Arguments: + # The argument and its parameter. + # Outputs: + # Prints error to stdout if a parameter is missing. + ####################################### + has_parameter() { + if [ -z "$2" ] || [[ "$2" == "-*" ]]; then + echo "ERROR: Missing parameter for argument" $1 + echo "Exiting..." + exit 1 + fi + } + + ####################################### + # Check if an argument has a forbidden parameter. + # Arguments: + # The argument and its parameter. + # Outputs: + # Prints error to stdout if a forbidden parameter exists. + ####################################### + has_no_parameter() { + if [[ "$1" == "-*" ]] && [[ "$2" != "-*" ]]; then + echo "ERROR: Argument $1 does not take any parameter ($2)" + echo "Exiting..." + exit 1 + fi + } + + ####################################### + # Parses the arguments and parameters given to the script. + # Globals: + # BLOCK + # COMPRESS + # NODE + # STORAGE_PATH + # TAG + # Arguments: + # $@ (the arguments given to the script) + ####################################### + parse_arguments(){ + while [[ $# -gt 0 ]]; do + case $1 in + -b|--block) + BLOCK="true" + has_no_parameter $1 $2 + shift + ;; + -c|--compress) + COMPRESS="1" + has_no_parameter $1 $2 + shift + ;; + -d|--debug) + has_no_parameter $1 $2 + set -x # Print commands + shift + ;; + -h|--help) + has_no_parameter $1 $2 + print_help + exit 0 + ;; + -n|--node) + has_parameter $1 $2 + NODE="$2" + shift + shift + ;; + -p|--path) + has_parameter $1 $2 + STORAGE_PATH="$2" + shift + shift + ;; + -t|--tag) + has_parameter $1 $2 + TAG="$2" + shift + shift + ;; + *) + echo "ERROR: Unknown option: " $1 + echo "Exiting..." + exit 1 + ;; + esac + done + } + + ####################################### + # Check that a valid combination of arguments have been stored in the global environment variables. + # Globals: + # STORAGE_PATH + # TAG + # Outputs: + # Error messages to stdout in case of errors. + ####################################### + check_arguments() { + + # Check that node name is given + if [ -z "$NODE" ]; then + echo "ERROR: Please use the option -n to provide a name of a node." + print_help + exit 1 + fi + + # Check that path to backup storage is given + if [ -z "$STORAGE_PATH" ]; then + echo $STORAGE_PATH + echo "ERROR: Please provide a path indicating where to store backups" + print_help + exit 1 + fi + + # Check that backup storage path exists + kubectl -n nexus exec $NODE -- /bin/bash -c "[ -d $STORAGE_PATH ]" + + if [ $? -ne 0 ]; then + echo "ERROR: The specified path $STORAGE_PATH does not seem to exist on $NODE" + exit 1 + fi + } + + ####################################### + # Verify the given checksum file. + # Arguments: + # Checksum file + # Outputs: + # Writes errors to stdout. + ####################################### + verify_checksum() { + echo "Verifying checksum..." + kubectl -n nexus exec $NODE -- /bin/bash -c "sha1sum -c $1" + + if [ $? -ne 0 ]; then + echo "ERROR: Copy failed, could not verify checksum." + echo "Exiting...." + exit 1 + else + echo "Copy and checksum check: OK" + fi + } + + ####################################### + # Back up blazegraph to a specified directory. + # Globals: + # COMPRESS + # TAG + # STORAGE_PATH + # Arguments: + # None + # Outputs: + # Writes information and errors to stdout. + ####################################### + back_up() { + + if [ -z $TAG ]; then + TIMESTAMP=$(date +%Y-%m-%d-%H:%M:%S) + TAG=$NODE"_"$TIMESTAMP + fi + + # If not specified, do not compress backups + if [ -z $COMPRESS ]; then + echo $(date -u) Backing up without compressing + COMPRESS=false + fi + + # If not specified, do not block REST API calls while backing up + if [ -z $BLOCK ]; then + echo $(date -u) Backing up without blocking REST API calls to Blazegraph + BLOCK=false + fi + + # Launch backup via Blazegraph API + echo $(date -u) Sending backup request to $NODE + + RESPONSE=$(kubectl -n nexus exec $NODE -- /bin/bash -c "curl -s --write-out 'STATUS %{http_code}\n' --data-urlencode "file=$STORAGE_PATH/$TAG.jnl" --data-urlencode "compress=$COMPRESS" --data-urlencode "block=$BLOCK" http://localhost:9999/blazegraph/backup") + + if [ "${RESPONSE:0-3}" != "200" ]; then + echo $(date -u) Blazegraph backup failed due to $RESPONSE + exit 1 + fi + + echo $(date -u) Blazegraph backup successful + echo $(date -u) $RESPONSE + + echo $(date -u) Creating checksum... + kubectl -n nexus exec $NODE -- /bin/bash -c "sha1sum $STORAGE_PATH/$TAG.jnl > $STORAGE_PATH/$TAG.sha1" + verify_checksum $STORAGE_PATH/$TAG.sha1 + + echo $(date -u) Done. + } + + # Check if no arguments given + if [ $# -eq 0 ]; then + echo "ERROR: Please provide arguments." + print_help + exit 1 + fi + + parse_arguments $@ + check_arguments + back_up + + exit 0 diff --git a/kubernetes/blazegraph/deleter-cronjob.yaml b/kubernetes/blazegraph/deleter-cronjob.yaml new file mode 100644 index 0000000000..bad30807aa --- /dev/null +++ b/kubernetes/blazegraph/deleter-cronjob.yaml @@ -0,0 +1,32 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: backups-deleter-blazegraph + namespace: nexus +spec: + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 + # Every day 01:30 + schedule: "30 01 * * *" + jobTemplate: + spec: + template: + metadata: + namespace: nexus + spec: + serviceAccountName: nexus + containers: + - name: backups-deleter-blazegraph + image: bitnami/kubectl:latest + command: + - "/bin/bash" + - "-c" + - "find /mnt/backups/ -mindepth 1 -maxdepth 1 -type f -mtime 2 \\( -name '*.sha1' -o -name '*.jnl' \\) -delete" + volumeMounts: + - name: backups + mountPath: /mnt/backups + volumes: + - name: backups + persistentVolumeClaim: + claimName: backups-blazegraph + restartPolicy: Never