Skip to content

Commit

Permalink
Refactoring script for tiler cache
Browse files Browse the repository at this point in the history
  • Loading branch information
Rub21 committed Nov 25, 2024
1 parent 745f775 commit ac6a597
Show file tree
Hide file tree
Showing 12 changed files with 81 additions and 56 deletions.
7 changes: 2 additions & 5 deletions images/tiler-cache/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY tiler-purge ./tiler-purge
COPY tiler-seed ./tiler-seed
COPY start.sh .
RUN chmod +x ./start.sh
COPY *.py .

ENTRYPOINT ["/app/start.sh"]
ENTRYPOINT ["python purge.py"]
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Tiler seeding
# Tiler seed script

Tiler seeding is a group of scripts aimed at generating tile cache for a specific zoom level, for example, from 1 to 7. The script will receive a GeoJSON of all the areas where tile cache generation is required for OHM tiles. This approach aims to reduce latency when a user starts interacting with OHM tiles.


# Tiler purge script

Script that reads an AWS SQS queue and creates a container to purge and seed the tiler cache for specific imposm expired files.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import json
from datetime import datetime, timezone, timedelta
import logging

from utils import (
check_tiler_db_postgres_status
)
# Configure logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s",
Expand All @@ -27,6 +29,11 @@
MIN_ZOOM = os.getenv("MIN_ZOOM", 8)
MAX_ZOOM = os.getenv("MAX_ZOOM", 16)
JOB_NAME_PREFIX = f"{ENVIRONMENT}-tiler-cache-purge-seed"
POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", 5432))
POSTGRES_DB = os.getenv("POSTGRES_DB", "postgres")
POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "password")

# Initialize Kubernetes and AWS clients
sqs = boto3.client("sqs", region_name=REGION_NAME)
Expand Down Expand Up @@ -54,6 +61,7 @@ def get_active_jobs_count():
"PodInitializing",
"ContainerCreating",
"Running",
"Error",
]:
logging.debug(f"Job '{job.metadata.name}' has a pod in {pod.status.phase} state.")
active_jobs_count += 1
Expand Down Expand Up @@ -122,6 +130,12 @@ def process_sqs_messages():

for message in messages:
try:
# Check PostgreSQL status
if not check_tiler_db_postgres_status():
logging.error("PostgreSQL database is down. Retrying in 1 minute...")
time.sleep(60)
continue

# Check active job count before processing
while get_active_jobs_count() >= MAX_ACTIVE_JOBS:
logging.warning(
Expand Down Expand Up @@ -163,4 +177,4 @@ def process_sqs_messages():

if __name__ == "__main__":
logging.info("Starting SQS message processing...")
process_sqs_messages()
process_sqs_messages()
1 change: 1 addition & 0 deletions images/tiler-cache/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ geopandas
pyproj
boto3
kubernetes
psycopg2-binary
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
save_geojson_boundary,
read_geojson_boundary,
boundary_to_tiles,
check_tiler_db_postgres_status,
)

# Configure logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s",
level=logging.INFO,
Expand All @@ -27,7 +27,7 @@
"--feature-type",
required=True,
help="Type of objects in the GeoJSON file",
default="polygon",
default="Polygon",
)
@click.option(
"--zoom-levels",
Expand All @@ -52,10 +52,17 @@
)
def main(geojson_url, feature_type, zoom_levels, concurrency, log_file, s3_bucket):
"""
Main function to process and seed tiles, with results uploaded to S3.
Main function to process and seed tiles
"""
logging.info("Starting the tile seeding process.")

# Check PostgreSQL status
logging.info("Checking PostgreSQL database status...")
if not check_tiler_db_postgres_status():
logging.error("PostgreSQL database is not running or unreachable. Exiting.")
return
logging.info("PostgreSQL database is running and reachable.")

# Extract base name from the GeoJSON URL
parsed_url = urlparse(geojson_url)
base_name = os.path.splitext(os.path.basename(parsed_url.path))[0]
Expand All @@ -73,14 +80,8 @@ def main(geojson_url, feature_type, zoom_levels, concurrency, log_file, s3_bucke
logging.error("No valid boundary geometry found.")
return

# # Save the boundary geometry to a GeoJSON file with the base name
# geojson_file = f"{base_name}_boundary.geojson"
# save_geojson_boundary(boundary_geometry, geojson_file)
# upload_to_s3(geojson_file, s3_bucket, f"tiler/logs/{geojson_file}")
# logging.info(f"Boundary GeoJSON saved and uploaded as {geojson_file}")

# Generate tiles based on boundary geometry and zoom levels
tiles = boundary_to_tiles(boundary_geometry, min_zoom)
tiles = boundary_to_tiles(boundary_geometry, min_zoom, max_zoom)
logging.info(f"Generated {len(tiles)} tiles for seeding.")

# Use base name for skipped tiles and log files
Expand Down
28 changes: 0 additions & 28 deletions images/tiler-cache/start.sh

This file was deleted.

3 changes: 0 additions & 3 deletions images/tiler-cache/tiler-purge/README.md

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,33 @@
import subprocess
import json
from smart_open import open as s3_open


import psycopg2
from psycopg2 import OperationalError

def check_tiler_db_postgres_status():
"""Check if the PostgreSQL database is running."""
logging.info("Checking PostgreSQL database status...")
POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", 5432))
POSTGRES_DB = os.getenv("POSTGRES_DB", "postgres")
POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "password")
try:
connection = psycopg2.connect(
host=POSTGRES_HOST,
port=POSTGRES_PORT,
database=POSTGRES_DB,
user=POSTGRES_USER,
password=POSTGRES_PASSWORD,
connect_timeout=5, # Timeout in seconds
)
connection.close()
logging.info("PostgreSQL database is running and reachable.")
return True
except OperationalError as e:
logging.error(f"PostgreSQL database is not reachable: {e}")
return False

def read_geojson_boundary(geojson_url, feature_type, buffer_distance_km=0.01):
"""Fetches and processes GeoJSON boundary data."""
try:
Expand Down
6 changes: 4 additions & 2 deletions ohm/templates/tiler-cache-purge/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
- -c
- |
set -x
python /app/tiler-purge/main.py
python purge.py
env:
- name: REGION_NAME
value: {{ .Values.ohm.tilerCachePurge.env.REGION_NAME | quote }}
Expand All @@ -50,7 +50,9 @@ spec:
value: {{ .Values.ohm.tilerCachePurge.env.MIN_ZOOM | quote }}
- name: MAX_ZOOM
value: {{ .Values.ohm.tilerCachePurge.env.MAX_ZOOM | quote }}

envFrom:
- configMapRef:
name: {{ .Release.Name }}-tiler-server-cm
{{- if .Values.ohm.tilerCachePurge.resources.enabled }}
resources:
requests:
Expand Down
15 changes: 13 additions & 2 deletions ohm/templates/tiler-cache-seed/cronjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,19 @@ spec:
- -c
- |
set -x
cd /app/tiler-seed && \
python main.py --geojson-url "$GEOJSON_URL" \
echo "Seeding tiler from 0 to 4 zoom levels"
tegola cache seed \
--config=/opt/tegola_config/config.toml \
--map=osm \
--min-zoom=0 \
--max-zoom=4 \
--bounds=-180,-85.05112878,180,85.05112878 \
--concurrency=4 \
--overwrite=true
echo "Seeding tiler for $ZOOM_LEVELS zoom levels"
python seed.py --geojson-url "$GEOJSON_URL" \
--feature-type "$FEATURE_TYPE" \
--zoom-levels "$ZOOM_LEVELS" \
--concurrency "$CONCURRENCY" \
Expand Down
2 changes: 1 addition & 1 deletion values.staging.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ ohm:
env:
GEOJSON_URL: https://gist.githubusercontent.com/Rub21/9aaf349d74d974c0393700af8eeeb43d/raw/9f8fa0dde911705208141a4ff941e2e5b51e245b/eu.geojson
FEATURE_TYPE: Polygon
ZOOM_LEVELS: '4,5,6,7'
ZOOM_LEVELS: '5,6,7'
CONCURRENCY: 32
S3_BUCKET: osmseed-dev
OUTPUT_FILE: /logs/tiler_benchmark.log
Expand Down

0 comments on commit ac6a597

Please sign in to comment.