Skip to content

Commit

Permalink
Merge pull request #73 from FR-DC/frml-154
Browse files Browse the repository at this point in the history
Adds scripts to spin up local replica Label Studio Server
  • Loading branch information
Eve-ning authored Jun 18, 2024
2 parents aa5292e + 367f6bf commit e9cb00a
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 16 deletions.
51 changes: 35 additions & 16 deletions src/frdc/load/label_studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,58 @@ def get_bounds_and_labels(self) -> tuple[list[tuple[int, int]], list[str]]:
bounds = []
labels = []

ann = self["annotations"][0]
results = ann["result"]
for r_ix, r in enumerate(results):
r: dict
# Each annotation is an entire image labelled by a single person.
# By selecting the 0th annotation, we are usually selecting the main
# annotation.
annotation = self["annotations"][0]

# There are some metadata in `annotation`, but we just want the results
results = annotation["result"]

for bbox_ix, bbox in enumerate(results):
# 'id' = {str} 'jr4EXAKAV8'
# 'type' = {str} 'polygonlabels'
# 'value' = {dict: 3} {
# 'closed': True,
# 'points': [[x0, y0], [x1, y1], ... [xn, yn]],
# 'polygonlabels': ['label']
# }
# 'origin' = {str} 'manual'
# 'to_name' = {str} 'image'
# 'from_name' = {str} 'label'
# 'image_rotation' = {int} 0
# 'original_width' = {int} 450
# 'original_height' = {int} 600
bbox: dict

# See Issue FRML-78: Somehow some labels are actually just metadata
if r["from_name"] != "label":
if bbox["from_name"] != "label":
continue

# We flatten the value dict into the result dict
v = r.pop("value")
r = {**r, **v}
v = bbox.pop("value")
bbox = {**bbox, **v}

# Points are in percentage, we need to convert them to pixels
r["points"] = [
bbox["points"] = [
(
int(x * r["original_width"] / 100),
int(y * r["original_height"] / 100),
int(x * bbox["original_width"] / 100),
int(y * bbox["original_height"] / 100),
)
for x, y in r["points"]
for x, y in bbox["points"]
]

# Only take the first label as this is not a multi-label task
r["label"] = r.pop("polygonlabels")[0]
if not r["closed"]:
bbox["label"] = bbox.pop("polygonlabels")[0]
if not bbox["closed"]:
logger.warning(
f"Label for {r['label']} @ {r['points']} not closed. "
f"Label for {bbox['label']} @ {bbox['points']} not closed. "
f"Skipping"
)
continue

bounds.append(r["points"])
labels.append(r["label"])
bounds.append(bbox["points"])
labels.append(bbox["label"])

return bounds, labels

Expand Down
121 changes: 121 additions & 0 deletions src/label-studio/label-studio-replica/default_config.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
<View>
<Header size="2">Replica FRDC Server</Header>
<Header size="6" style="color:red">
This is a replica server. All changes will NOT be reflected onto the Machine Learning Pipeline.
</Header>
<Image name="image" value="$img" zoom="true" zoomControl="true"/>
<Header size="8">Select Species</Header>
<View style="height:200px; overflow:auto;">
<Filter name="filterSpecies" toName="label" hotkey="shift+f"
minlength="0" placeholder="Type Species Name"/>
<PolygonLabels name="label" toName="image" strokeWidth="3"
pointSize="small" opacity="0.1" required="True">

<Label value="Alstonia Angulstiloba" background="#FFA39E"/>
<Label value="Bridelia Sp." background="#D4380D"/>
<Label value="Calophyllum" background="#FFC069"/>
<Label value="Campnosperma Auriculatum" background="#AD8B00"/>
<Label value="Cinnamomum Iners" background="#D3F261"/>
<Label value="Claoxylon Indicum" background="#389E0D"/>
<Label value="Clausena Excavata" background="#5CDBD3"/>
<Label value="Cratoxylum Formosum" background="#096DD9"/>
<Label value="Dillenia Suffruticosa" background="#ADC6FF"/>
<Label value="Fagraea fragans" background="#389E0D"/>
<Label value="Falcataria Moluccana" background="#9254DE"/>
<Label value="Ficus Variegata" background="#F759AB"/>
<Label value="Ixonanthes reticulata" background="#008BAD"/>
<Label value="Koompassia malaccensis" background="#FFA39E"/>
<Label value="Leea Indica" background="#FFA39E"/>
<Label value="Litsea firma" background="#F8FE00"/>
<Label value="Macaranga Gigantea" background="#D4380D"/>
<Label value="Palm Tree" background="#FFC069"/>
<Label value="Pennisetum Purpureum" background="#AD8B00"/>
<Label value="Pometia Pinnata" background="#D3F261"/>
<Label value="Prunus polystachya" background="#F3A201"/>
<Label value="Sandoricum Koetjape" background="#389E0D"/>
<Label value="Shorea Leprosula" background="#5CDBD3"/>
<Label value="Spathodea Campanulatum" background="#096DD9"/>
<Label value="Sterculia Parviflora" background="#E178C5"/>
<Label value="Syzygium grande" background="#5C0BD3"/>
<Label value="Syzygium Polyanthum" background="#9254DE"/>
<Label value="Terminalia Catappa" background="#F759AB"/>
<Label value="_Unknown" background="#676767"/>
</PolygonLabels>
</View>

<View visibleWhen="region-selected">
<Header>Select Quality</Header>
<Choices name="Quality" toName="image" perRegion="true"
choice="single-radio" required="false">
<Choice value="Good" background="green"/>
<Choice value="Bad" background="red"/>
</Choices>

<Choices name="BadRatingReason" toName="image" choice="multiple"
showInLine="true" visibleWhen="choice-selected"
whenTagName="Quality" whenChoiceValue="Bad" perRegion="true">

<Choice value="Artefacts Present"/>
<Choice value="Brightness Problem"/>
<Choice value="Others (write in comment)"/>

<View visibleWhen="choice-selected" whenTagName="BadRatingReason"
whenChoiceValue="Others (write in comment)">
<TextArea name="BadRatingReasonComment" toName="image"
perRegion="true" placeholder="Write the reason">
</TextArea>
</View>

</Choices>
<Header>Submitted By (Team):</Header>
<View>
<!--Filter name="SubmitTeamFilter" toName="Team" hotkey="shift+f" minlength="0" placeholder="type team name"/ -->
<Labels required="false" name="Team" toName="image"
showInline="false" perRegion="true">
<Label value="EngineeringSubmit"/>
<Label value="EcologySubmit"/>
</Labels>
<Header size="7">UserID (Submit):</Header>
<!--TextArea required="false" name="SubmitUserId" toName="image" placeholder="userID" perRegion="true"/ -->
<Choices name="SubmitUserIdChoice" toName="Team" perRegion="true"
choice="single-radio" visibleWhen="region-selected">
<Choice value="jjsit"/>
<Choice value="shyap"/>
<Choice value="jchang"/>
<Choice value="bchew"/>
<Choice value="yksim"/>
<Choice value="ahassan"/>
</Choices>
</View>

<Header>Checked By (Team):</Header>
<View>
<!--Filter name="CheckTeamFilter" toName="CheckTeam" hotkey="shift+f" minlength="0" placeholder="type team name"/ -->
<Labels required="false" name="CheckTeam" toName="image"
showInline="false" perRegion="true">
<Label value="EngineeringChecked_Agree"/>
<Label value="EcologyChecked_Agree"/>
<Label value="EngineeringChecked_Disagree"/>
<Label value="EcologyChecked_Disagree"/>


</Labels>
<Header size="7">UserID (Check):</Header>
<TextArea name="CheckUserId" toName="image" placeholder="userID"
perRegion="true"/>
</View>
<Header>General Comment</Header>
<View>
<Labels required="false" name="CommentType" toName="image"
showInline="false" perRegion="true">
<Label value="New Comment"/>
<Label value="Resolved Comment"/>
</Labels>
<TextArea name="GeneralComment" toName="image" perRegion="true"
placeholder="General Comment">
</TextArea>
</View>

</View>

</View>
78 changes: 78 additions & 0 deletions src/label-studio/label-studio-replica/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
version: "3.9"
services:
nginx:
build: .
image: heartexlabs/label-studio:latest
restart: unless-stopped
ports:
- "8082:8085"
- "8083:8086"
depends_on:
- app
environment:
- LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-}
# Optional: Specify SSL termination certificate & key
# Just drop your cert.pem and cert.key into folder 'deploy/nginx/certs'
# - NGINX_SSL_CERT=/certs/cert.pem
# - NGINX_SSL_CERT_KEY=/certs/cert.key
volumes:
- ./mydata:/label-studio/data:rw
- ./deploy/nginx/certs:/certs:ro
# Optional: Override nginx default conf
# - ./deploy/my.conf:/etc/nginx/nginx.conf
command: nginx
networks:
- label-studio-dev

app:
stdin_open: true
tty: true
build: .
image: heartexlabs/label-studio:latest
restart: unless-stopped
expose:
- "8000"
depends_on:
- db-dev
environment:
- DJANGO_DB=default
- POSTGRE_NAME=postgres
- POSTGRE_USER=postgres
- POSTGRE_PASSWORD=
- POSTGRE_PORT=5432
- POSTGRE_HOST=db-dev
- LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-}
- JSON_LOG=1
# - LOG_LEVEL=DEBUG
volumes:
- ./mydata:/label-studio/data:rw
command: label-studio-uwsgi
networks:
- label-studio-dev

db-dev:
image: postgres:11.5
hostname: db-dev
restart: unless-stopped
# Optional: Enable TLS on PostgreSQL
# Just drop your server.crt and server.key into folder 'deploy/pgsql/certs'
# NOTE: Both files must have permissions u=rw (0600) or less
# command: >
# -c ssl=on
# -c ssl_cert_file=/var/lib/postgresql/certs/server.crt
# -c ssl_key_file=/var/lib/postgresql/certs/server.key
ports:
- "5435:5432"
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
volumes:
- ${POSTGRES_DATA_DIR:-./postgres-data}:/var/lib/postgresql/data
- ${POSTGRES_DATA_DIR:-./postgres-backups}:/var/lib/postgresql/backups
- ./deploy/pgsql/certs:/var/lib/postgresql/certs:ro
networks:
- label-studio-dev

networks:
label-studio-dev:
name: label-studio-dev
driver: bridge
99 changes: 99 additions & 0 deletions src/label-studio/label-studio-replica/initialize_replica.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import os
import time
from pathlib import Path

import label_studio_sdk

THIS_DIR = Path(__file__).parent

# This is your API token. I put mine here, which is OK only if you're in a
# development environment. Otherwise, do not.
dev_api_key = os.getenv("REPLICA_LABEL_STUDIO_API_KEY")
prd_api_key = os.getenv("LABEL_STUDIO_API_KEY")
dev_url = "http://localhost:8082"
prd_url = "http://localhost:8080"

# We can initialize the sdk using this following.
# The client is like the middleman between you as a programmer, and the
# Label Studio (LS) server.
dev_client = label_studio_sdk.Client(url=dev_url, api_key=dev_api_key)
prd_client = label_studio_sdk.Client(url=prd_url, api_key=prd_api_key)

# This is the labelling interface configuration.
# We can save it somewhere as an XML file then import it too
dev_config = (THIS_DIR / "default_config.xml").read_text()

# %%
print("Creating Development Project...")
# Creates the project, note to set the config here
dev_proj = dev_client.create_project(
title="FRDC Replica",
description="This is the replica project of FRDC. It's ok to break this.",
label_config=dev_config,
color="#FF0025",
)
# %%
print("Adding Import Source...")
# This links to our GCS as an import source
dev_storage = dev_proj.connect_google_import_storage(
bucket="frdc-ds",
regex_filter=".*.jpg",
google_application_credentials=(
THIS_DIR / "frmodel-943e4feae446.json"
).read_text(),
presign=False,
title="Source",
)
time.sleep(5)
# %%
print("Syncing Storage...")
# Then, we sync it so that all the images appear as annotation targets
dev_proj.sync_storage(
storage_type=dev_storage["type"],
storage_id=dev_storage["id"],
)
time.sleep(5)
# %%
print("Retrieving Tasks...")
prd_proj = prd_client.get_project(id=1)
prd_tasks = prd_proj.get_tasks()
dev_tasks = dev_proj.get_tasks()
# %%
# This step copies over the annotations from the production to the development
# This creates it as a "prediction"
print("Copying Annotations...")
for prd_task in prd_tasks:
# For each prod task, we find the corresponding (image) file name
prd_fn = prd_task["storage_filename"]

# Then, we find the corresponding task in the development project
dev_tasks_matched = [
t for t in dev_tasks if t["storage_filename"] == prd_fn
]

# Do some error handling
if len(dev_tasks_matched) == 0:
print(f"File not found in dev: {prd_fn}")
continue
if len(dev_tasks_matched) > 1:
print(f"Too many matches found in dev: {prd_fn}")
continue

# Get the first match
dev_task = dev_tasks_matched[0]

# Only get annotations by evening
prd_ann = [
ann
for ann in prd_task["annotations"]
if "dev_evening" in ann["created_username"]
][0]

# Create the prediction using the result from production
dev_proj.create_prediction(
task_id=dev_task["id"],
result=prd_ann["result"],
model_version="API Testing Prediction",
)

print("Done!")

0 comments on commit e9cb00a

Please sign in to comment.