From 367f6bf5af460c1e53a85b0be0a2c562e5167ad4 Mon Sep 17 00:00:00 2001 From: Evening Date: Thu, 13 Jun 2024 17:24:32 +0800 Subject: [PATCH] Add spin up dependencies and script for replica --- .../label-studio-replica/default_config.xml | 121 ++++++++++++++++++ .../label-studio-replica/docker-compose.yml | 78 +++++++++++ .../initialize_replica.py | 99 ++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 src/label-studio/label-studio-replica/default_config.xml create mode 100644 src/label-studio/label-studio-replica/docker-compose.yml create mode 100644 src/label-studio/label-studio-replica/initialize_replica.py diff --git a/src/label-studio/label-studio-replica/default_config.xml b/src/label-studio/label-studio-replica/default_config.xml new file mode 100644 index 00000000..051211f4 --- /dev/null +++ b/src/label-studio/label-studio-replica/default_config.xml @@ -0,0 +1,121 @@ + +
Replica FRDC Server
+
+ This is a replica server. All changes will NOT be reflected onto the Machine Learning Pipeline. +
+ +
Select Species
+ + + + + + + + +
Select Quality
+ + + + + + + + + + + + + + + + +
Submitted By (Team):
+ + + + +
UserID (Submit):
+ + + + + + + + + +
+ +
Checked By (Team):
+ + + + +
UserID (Check):
+ +
+ +
+ +
\ No newline at end of file diff --git a/src/label-studio/label-studio-replica/docker-compose.yml b/src/label-studio/label-studio-replica/docker-compose.yml new file mode 100644 index 00000000..80bc06cc --- /dev/null +++ b/src/label-studio/label-studio-replica/docker-compose.yml @@ -0,0 +1,78 @@ +version: "3.9" +services: + nginx: + build: . + image: heartexlabs/label-studio:latest + restart: unless-stopped + ports: + - "8082:8085" + - "8083:8086" + depends_on: + - app + environment: + - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} + # Optional: Specify SSL termination certificate & key + # Just drop your cert.pem and cert.key into folder 'deploy/nginx/certs' + # - NGINX_SSL_CERT=/certs/cert.pem + # - NGINX_SSL_CERT_KEY=/certs/cert.key + volumes: + - ./mydata:/label-studio/data:rw + - ./deploy/nginx/certs:/certs:ro + # Optional: Override nginx default conf + # - ./deploy/my.conf:/etc/nginx/nginx.conf + command: nginx + networks: + - label-studio-dev + + app: + stdin_open: true + tty: true + build: . + image: heartexlabs/label-studio:latest + restart: unless-stopped + expose: + - "8000" + depends_on: + - db-dev + environment: + - DJANGO_DB=default + - POSTGRE_NAME=postgres + - POSTGRE_USER=postgres + - POSTGRE_PASSWORD= + - POSTGRE_PORT=5432 + - POSTGRE_HOST=db-dev + - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} + - JSON_LOG=1 + # - LOG_LEVEL=DEBUG + volumes: + - ./mydata:/label-studio/data:rw + command: label-studio-uwsgi + networks: + - label-studio-dev + + db-dev: + image: postgres:11.5 + hostname: db-dev + restart: unless-stopped + # Optional: Enable TLS on PostgreSQL + # Just drop your server.crt and server.key into folder 'deploy/pgsql/certs' + # NOTE: Both files must have permissions u=rw (0600) or less + # command: > + # -c ssl=on + # -c ssl_cert_file=/var/lib/postgresql/certs/server.crt + # -c ssl_key_file=/var/lib/postgresql/certs/server.key + ports: + - "5435:5432" + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + volumes: + - ${POSTGRES_DATA_DIR:-./postgres-data}:/var/lib/postgresql/data + - ${POSTGRES_DATA_DIR:-./postgres-backups}:/var/lib/postgresql/backups + - ./deploy/pgsql/certs:/var/lib/postgresql/certs:ro + networks: + - label-studio-dev + +networks: + label-studio-dev: + name: label-studio-dev + driver: bridge diff --git a/src/label-studio/label-studio-replica/initialize_replica.py b/src/label-studio/label-studio-replica/initialize_replica.py new file mode 100644 index 00000000..71fc30bb --- /dev/null +++ b/src/label-studio/label-studio-replica/initialize_replica.py @@ -0,0 +1,99 @@ +import os +import time +from pathlib import Path + +import label_studio_sdk + +THIS_DIR = Path(__file__).parent + +# This is your API token. I put mine here, which is OK only if you're in a +# development environment. Otherwise, do not. +dev_api_key = os.getenv("REPLICA_LABEL_STUDIO_API_KEY") +prd_api_key = os.getenv("LABEL_STUDIO_API_KEY") +dev_url = "http://localhost:8082" +prd_url = "http://localhost:8080" + +# We can initialize the sdk using this following. +# The client is like the middleman between you as a programmer, and the +# Label Studio (LS) server. +dev_client = label_studio_sdk.Client(url=dev_url, api_key=dev_api_key) +prd_client = label_studio_sdk.Client(url=prd_url, api_key=prd_api_key) + +# This is the labelling interface configuration. +# We can save it somewhere as an XML file then import it too +dev_config = (THIS_DIR / "default_config.xml").read_text() + +# %% +print("Creating Development Project...") +# Creates the project, note to set the config here +dev_proj = dev_client.create_project( + title="FRDC Replica", + description="This is the replica project of FRDC. It's ok to break this.", + label_config=dev_config, + color="#FF0025", +) +# %% +print("Adding Import Source...") +# This links to our GCS as an import source +dev_storage = dev_proj.connect_google_import_storage( + bucket="frdc-ds", + regex_filter=".*.jpg", + google_application_credentials=( + THIS_DIR / "frmodel-943e4feae446.json" + ).read_text(), + presign=False, + title="Source", +) +time.sleep(5) +# %% +print("Syncing Storage...") +# Then, we sync it so that all the images appear as annotation targets +dev_proj.sync_storage( + storage_type=dev_storage["type"], + storage_id=dev_storage["id"], +) +time.sleep(5) +# %% +print("Retrieving Tasks...") +prd_proj = prd_client.get_project(id=1) +prd_tasks = prd_proj.get_tasks() +dev_tasks = dev_proj.get_tasks() +# %% +# This step copies over the annotations from the production to the development +# This creates it as a "prediction" +print("Copying Annotations...") +for prd_task in prd_tasks: + # For each prod task, we find the corresponding (image) file name + prd_fn = prd_task["storage_filename"] + + # Then, we find the corresponding task in the development project + dev_tasks_matched = [ + t for t in dev_tasks if t["storage_filename"] == prd_fn + ] + + # Do some error handling + if len(dev_tasks_matched) == 0: + print(f"File not found in dev: {prd_fn}") + continue + if len(dev_tasks_matched) > 1: + print(f"Too many matches found in dev: {prd_fn}") + continue + + # Get the first match + dev_task = dev_tasks_matched[0] + + # Only get annotations by evening + prd_ann = [ + ann + for ann in prd_task["annotations"] + if "dev_evening" in ann["created_username"] + ][0] + + # Create the prediction using the result from production + dev_proj.create_prediction( + task_id=dev_task["id"], + result=prd_ann["result"], + model_version="API Testing Prediction", + ) + +print("Done!")