From 217fdd45df85c8a15a797f511e6c7a4bc0183024 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Mon, 4 Nov 2024 15:32:15 +0100 Subject: [PATCH] Added ingest.sh script. Co-authored-by: Jonas --- Makefile | 8 ++++- README.md | 1 + docs/manage-data.md | 34 +++++++++++++++++++ ingest.sh | 79 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 docs/manage-data.md create mode 100755 ingest.sh diff --git a/Makefile b/Makefile index 7f8df88..0af18a9 100755 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ HELM_REPO_URL=https://devseed.com/eoapi-k8s/ HELM_CHART_NAME=eoapi/eoapi PGO_CHART_VERSION=5.7.0 -.PHONY: all deploy minikube help +.PHONY: all deploy minikube ingest help # Default target all: deploy @@ -31,8 +31,14 @@ minikube: @echo "eoAPI is now available at:" @minikube service ingress-nginx-controller -n ingress-nginx --url | head -n 1 +ingest: + @echo "Ingesting STAC collections and items into the database." + @command -v bash >/dev/null 2>&1 || { echo "bash is required but not installed"; exit 1; } + @./ingest.sh || { echo "Ingestion failed."; exit 1; } + help: @echo "Makefile commands:" @echo " make deploy - Install eoAPI on a cluster kubectl is connected to." @echo " make minikube - Install eoAPI on minikube." + @echo " make ingest - Ingest STAC collections and items into the database." @echo " make help - Show this help message." diff --git a/README.md b/README.md index 8d260ba..9715b11 100644 --- a/README.md +++ b/README.md @@ -56,4 +56,5 @@ Instead of using the `make` commands above you can also [manually `helm install` * Read about [Default Configuration](./docs/configuration.md#default-configuration) and other [Configuration Options](./docs/configuration.md#additional-options) +* [Manage your data](./docs/manage-data.md) in eoAPI * Learn about [Autoscaling / Monitoring / Observability](./docs/autoscaling.md) diff --git a/docs/manage-data.md b/docs/manage-data.md new file mode 100644 index 0000000..7a3a8fa --- /dev/null +++ b/docs/manage-data.md @@ -0,0 +1,34 @@ +# Data management + +eoAPI-k8s provides a basic data ingestion process that consist of manual operations on the components of the stack. + +# Load data + +You will have to have STAC records for the collection and items you wish to load (e.g., `collections.json` and `items.json`). +[This repo](https://github.com/vincentsarago/MAXAR_opendata_to_pgstac) contains a few script that may help you to generate sample input data. + +## Preshipped bash script + +Execute `make ingest` to load data into the eoAPI service - it expects `collections.json` and `items.json` in the current directory. + +## Manual steps + +In order to add raster data to eoAPI you can load STAC collections and items into the PostgreSQL database using pgSTAC and the tool `pypgstac`. + +First, ensure your Kubernetes cluster is running and `kubectl` is configured to access and modify it. + +In a second step, you'll have to upload the data into the pod running the raster eoAPI service. You can use the following commands to copy the data: + +```bash +kubectl cp collections.json "$NAMESPACE/$EOAPI_POD_RASTER":/tmp/collections.json +kubectl cp items.json "$NAMESPACE/$EOAPI_POD_RASTER":/tmp/items.json +``` +Then, bash into the pod or server running the raster eoAPI service, you can use the following commands to load the data: + +```bash +#!/bin/bash +apt update -y && apt install python3 python3-pip -y && pip install pypgstac[psycopg]'; +pypgstac pgready --dsn $PGADMIN_URI +pypgstac load collections /tmp/collections.json --dsn $PGADMIN_URI --method insert_ignore +pypgstac load items /tmp/items.json --dsn $PGADMIN_URI --method insert_ignore +``` diff --git a/ingest.sh b/ingest.sh new file mode 100755 index 0000000..7b5e0fa --- /dev/null +++ b/ingest.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# Default files +DEFAULT_COLLECTIONS_FILE="./collections.json" +DEFAULT_ITEMS_FILE="./items.json" + +# Check for provided parameters or use defaults +if [ "$#" -eq 2 ]; then + EOAPI_COLLECTIONS_FILE="$1" + EOAPI_ITEMS_FILE="$2" +else + EOAPI_COLLECTIONS_FILE="$DEFAULT_COLLECTIONS_FILE" + EOAPI_ITEMS_FILE="$DEFAULT_ITEMS_FILE" + echo "No specific files provided. Using defaults:" + echo " Collections file: $EOAPI_COLLECTIONS_FILE" + echo " Items file: $EOAPI_ITEMS_FILE" +fi + +# Define namespaces +NAMESPACES=("default" "eoapi", "data-access") +EOAPI_POD_RASTER="" +FOUND_NAMESPACE="" + +# Discover the pod name from both namespaces +for NS in "${NAMESPACES[@]}"; do + EOAPI_POD_RASTER=$(kubectl get pods -n "$NS" -l app=raster-eoapi -o jsonpath="{.items[0].metadata.name}" 2>/dev/null) + if [ -n "$EOAPI_POD_RASTER" ]; then + FOUND_NAMESPACE="$NS" + echo "Found raster-eoapi pod: $EOAPI_POD_RASTER in namespace: $FOUND_NAMESPACE" + break + fi +done + +# Check if the pod was found +if [ -z "$EOAPI_POD_RASTER" ]; then + echo "Could not determine raster-eoapi pod." + exit 1 +fi + +# Check if input files exist +for FILE in "$EOAPI_COLLECTIONS_FILE" "$EOAPI_ITEMS_FILE"; do + if [ ! -f "$FILE" ]; then + echo "File not found: $FILE. You may set them via the EOAPI_COLLECTIONS_FILE and EOAPI_ITEMS_FILE environment variables." + exit 1 + fi +done + +# Install required packages +echo "Installing required packages in pod $EOAPI_POD_RASTER in namespace $FOUND_NAMESPACE..." +if ! kubectl exec -n "$FOUND_NAMESPACE" "$EOAPI_POD_RASTER" -- bash -c 'apt update -y && apt install python3 python3-pip -y && pip install pypgstac[psycopg]'; then + echo "Failed to install packages." + exit 1 +fi + +# Copy files to pod +echo "Copying files to pod..." +echo "Using collections file: $EOAPI_COLLECTIONS_FILE" +echo "Using items file: $EOAPI_ITEMS_FILE" +kubectl cp "$EOAPI_COLLECTIONS_FILE" "$FOUND_NAMESPACE/$EOAPI_POD_RASTER":/tmp/collections.json +kubectl cp "$EOAPI_ITEMS_FILE" "$FOUND_NAMESPACE/$EOAPI_POD_RASTER":/tmp/items.json + +# Load collections and items +echo "Loading collections..." +if ! kubectl exec -n "$FOUND_NAMESPACE" "$EOAPI_POD_RASTER" -- bash -c 'pypgstac load collections /tmp/collections.json --dsn "$PGADMIN_URI" --method insert_ignore'; then + echo "Failed to load collections." + exit 1 +fi + +echo "Loading items..." +if ! kubectl exec -n "$FOUND_NAMESPACE" "$EOAPI_POD_RASTER" -- bash -c 'pypgstac load items /tmp/items.json --dsn "$PGADMIN_URI" --method insert_ignore'; then + echo "Failed to load items." + exit 1 +fi + +# Clean temporary files +echo "Cleaning temporary files..." +kubectl exec -n "$FOUND_NAMESPACE" "$EOAPI_POD_RASTER" -- bash -c 'rm -f /tmp/collection.json /tmp/items.json' + +echo "Ingestion complete."