From 5ed63c861097a6c4dc4096bea84570696af88992 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 18 Sep 2023 12:40:38 -0500 Subject: [PATCH 01/21] Add project folder --- label_studio_ml/examples/yolov8/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 label_studio_ml/examples/yolov8/README.md diff --git a/label_studio_ml/examples/yolov8/README.md b/label_studio_ml/examples/yolov8/README.md new file mode 100644 index 000000000..1aaac3fda --- /dev/null +++ b/label_studio_ml/examples/yolov8/README.md @@ -0,0 +1 @@ +Here is the project that integrates YOLOV8 with Label Studio. \ No newline at end of file From f14005f08d48d477dde4af76b970043e8f838fb9 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 18 Sep 2023 12:51:34 -0500 Subject: [PATCH 02/21] Add boilerplate --- label_studio_ml/examples/yolov8/Dockerfile | 19 +++ label_studio_ml/examples/yolov8/_wsgi.py | 114 ++++++++++++++++++ .../examples/yolov8/docker-compose.yml | 13 ++ label_studio_ml/examples/yolov8/model.py | 62 ++++++++++ .../examples/yolov8/requirements.txt | 2 + 5 files changed, 210 insertions(+) create mode 100644 label_studio_ml/examples/yolov8/Dockerfile create mode 100644 label_studio_ml/examples/yolov8/_wsgi.py create mode 100644 label_studio_ml/examples/yolov8/docker-compose.yml create mode 100644 label_studio_ml/examples/yolov8/model.py create mode 100644 label_studio_ml/examples/yolov8/requirements.txt diff --git a/label_studio_ml/examples/yolov8/Dockerfile b/label_studio_ml/examples/yolov8/Dockerfile new file mode 100644 index 000000000..d4ca437a2 --- /dev/null +++ b/label_studio_ml/examples/yolov8/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.8-slim + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y git + +ENV PYTHONUNBUFFERED=True \ + PORT=9090 \ + WORKERS=2 \ + THREADS=4 + +WORKDIR /app +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . ./ + +CMD exec gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app diff --git a/label_studio_ml/examples/yolov8/_wsgi.py b/label_studio_ml/examples/yolov8/_wsgi.py new file mode 100644 index 000000000..9ab9ef564 --- /dev/null +++ b/label_studio_ml/examples/yolov8/_wsgi.py @@ -0,0 +1,114 @@ +import os +import argparse +import json +import logging +import logging.config + +logging.config.dictConfig({ + "version": 1, + "formatters": { + "standard": { + "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": os.getenv('LOG_LEVEL'), + "stream": "ext://sys.stdout", + "formatter": "standard" + } + }, + "root": { + "level": os.getenv('LOG_LEVEL'), + "handlers": [ + "console" + ], + "propagate": True + } +}) + +from label_studio_ml.api import init_app +from model import NewModel + + +_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json') + + +def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH): + if not os.path.exists(config_path): + return dict() + with open(config_path) as f: + config = json.load(f) + assert isinstance(config, dict) + return config + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Label studio') + parser.add_argument( + '-p', '--port', dest='port', type=int, default=9090, + help='Server port') + parser.add_argument( + '--host', dest='host', type=str, default='0.0.0.0', + help='Server host') + parser.add_argument( + '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='), + help='Additional LabelStudioMLBase model initialization kwargs') + parser.add_argument( + '-d', '--debug', dest='debug', action='store_true', + help='Switch debug mode') + parser.add_argument( + '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None, + help='Logging level') + parser.add_argument( + '--model-dir', dest='model_dir', default=os.path.dirname(__file__), + help='Directory where models are stored (relative to the project directory)') + parser.add_argument( + '--check', dest='check', action='store_true', + help='Validate model instance before launching server') + + args = parser.parse_args() + + # setup logging level + if args.log_level: + logging.root.setLevel(args.log_level) + + def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + def parse_kwargs(): + param = dict() + for k, v in args.kwargs: + if v.isdigit(): + param[k] = int(v) + elif v == 'True' or v == 'true': + param[k] = True + elif v == 'False' or v == 'false': + param[k] = False + elif isfloat(v): + param[k] = float(v) + else: + param[k] = v + return param + + kwargs = get_kwargs_from_config() + + if args.kwargs: + kwargs.update(parse_kwargs()) + + if args.check: + print('Check "' + NewModel.__name__ + '" instance creation..') + model = NewModel(**kwargs) + + app = init_app(model_class=NewModel) + + app.run(host=args.host, port=args.port, debug=args.debug) + +else: + # for uWSGI use + app = init_app(model_class=NewModel) diff --git a/label_studio_ml/examples/yolov8/docker-compose.yml b/label_studio_ml/examples/yolov8/docker-compose.yml new file mode 100644 index 000000000..050e7f189 --- /dev/null +++ b/label_studio_ml/examples/yolov8/docker-compose.yml @@ -0,0 +1,13 @@ +version: "3.8" + +services: + ml-backend: + container_name: ml-backend + build: . + environment: + - MODEL_DIR=/data/models + - WORKERS=2 + - THREADS=4 + - LOG_LEVEL=DEBUG + ports: + - "9090:9090" diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py new file mode 100644 index 000000000..81569c85e --- /dev/null +++ b/label_studio_ml/examples/yolov8/model.py @@ -0,0 +1,62 @@ +from typing import List, Dict, Optional +from label_studio_ml.model import LabelStudioMLBase + +from ultralytics import YOLO + + +class NewModel(LabelStudioMLBase): + + def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: + """ Write your inference logic here + :param tasks: [Label Studio tasks in JSON format](https://labelstud.io/guide/task_format.html) + :param context: [Label Studio context in JSON format](https://labelstud.io/guide/ml.html#Passing-data-to-ML-backend) + :return predictions: [Predictions array in JSON format](https://labelstud.io/guide/export.html#Raw-JSON-format-of-completed-tasks) + """ + print(f'''\ + Run prediction on {tasks} + Received context: {context} + Project ID: {self.project_id} + Label config: {self.label_config} + Parsed JSON Label config: {self.parsed_label_config}''') + + # TODO: here figure out what type of prediction we are looking for -> classification, segmentation, bounding boxes, etc. + context = "classification" + + if context=="classification": + model = YOLO('yolov8n-cls.pt') + + + img = 'https://ultralytics.com/images/bus.jpg' + + + results = model(img) + + + + + return [] + + def fit(self, event, data, **kwargs): + """ + This method is called each time an annotation is created or updated + You can run your logic here to update the model and persist it to the cache + It is not recommended to perform long-running operations here, as it will block the main thread + Instead, consider running a separate process or a thread (like RQ worker) to perform the training + :param event: event type can be ('ANNOTATION_CREATED', 'ANNOTATION_UPDATED') + :param data: the payload received from the event (check [Webhook event reference](https://labelstud.io/guide/webhook_reference.html)) + """ + + # use cache to retrieve the data from the previous fit() runs + old_data = self.get('my_data') + old_model_version = self.get('model_version') + print(f'Old data: {old_data}') + print(f'Old model version: {old_model_version}') + + # store new data to the cache + self.set('my_data', 'my_new_data_value') + self.set('model_version', 'my_new_model_version') + print(f'New data: {self.get("my_data")}') + print(f'New model version: {self.get("model_version")}') + + print('fit() completed successfully.') + diff --git a/label_studio_ml/examples/yolov8/requirements.txt b/label_studio_ml/examples/yolov8/requirements.txt new file mode 100644 index 000000000..2bf45e086 --- /dev/null +++ b/label_studio_ml/examples/yolov8/requirements.txt @@ -0,0 +1,2 @@ +gunicorn==20.1.0 +label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git \ No newline at end of file From 666cf854bdec9e2ddec9f691878d83ee298a6fcf Mon Sep 17 00:00:00 2001 From: shondle Date: Fri, 3 Nov 2023 12:01:10 -0500 Subject: [PATCH 03/21] Add model loading --- label_studio_ml/examples/yolov8/model.py | 325 ++++++++++++++++++++++- 1 file changed, 311 insertions(+), 14 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 81569c85e..925af37cc 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -1,40 +1,199 @@ from typing import List, Dict, Optional from label_studio_ml.model import LabelStudioMLBase +from label_studio_ml.utils import get_image_local_path + +import os +from PIL import Image +from uuid import uuid4 from ultralytics import YOLO +import torch + +import os +import yaml + + +LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN") +LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") + + +# change config file depending on how many classes there are in the saved model + +NEW_START = False + +if NEW_START: + model = YOLO('yolov8n.pt') +else: + model = YOLO('./yolov8n.yml') + # model = torch.hub.load('ultralytics/yolov8', 'yolov8n', classes=2) + model.load_state_dict(torch.load('yolov8n(testing).pt')) + # model.eval() + +# TODO: +# figure out how to integrate class names for things not predicted + + +class YOLO(LabelStudioMLBase): + def __init__(self, project_id, **kwargs): + super(YOLO, self).__init__(**kwargs) + self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps -class NewModel(LabelStudioMLBase): + # read this from the config file + self.class_to_name = { + 0: "cats", + 1: "dog" + } + + # print(self.label_config) + # print(self.parsed_label_config) + + + # TODO: this should all be done before loading the model + + + # create a new YAML file for training + parsed = self.parsed_label_config + classes = parsed['label']['labels'] + + self.class_to_name = {i:v for i,v in enumerate(classes)} + self.name_to_class = {v:k for k, v in self.class_to_name.items()} + + input_file = "train_config.yml" + with open(input_file, "r") as file: + data = yaml.safe_load(file) + + data["names"] = self.class_to_name + + with open(input_file, "w") as file: + yaml.dump(data, file, default_flow_style=False) + + # TODO: adjust num_classes in the yolov8 yaml file as well + weights_file = "yolov8.yml" + with open(weights_file, "r") as file: + weights = yaml.safe_load(file) + + weights["nc"] = len(self.class_to_name) + + with open(weights_file, "w") as file: + yaml.dump(weights, file, default_flow_style=False) + + # model = YOLO('./yolov8n.yml') + # # model = torch.hub.load('ultralytics/yolov8', 'yolov8n', classes=2) + # model.load_state_dict(torch.load('yolov8n(testing).pt')) + + + + print(classes) def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: """ Write your inference logic here :param tasks: [Label Studio tasks in JSON format](https://labelstud.io/guide/task_format.html) :param context: [Label Studio context in JSON format](https://labelstud.io/guide/ml.html#Passing-data-to-ML-backend) :return predictions: [Predictions array in JSON format](https://labelstud.io/guide/export.html#Raw-JSON-format-of-completed-tasks) """ - print(f'''\ - Run prediction on {tasks} - Received context: {context} - Project ID: {self.project_id} - Label config: {self.label_config} - Parsed JSON Label config: {self.parsed_label_config}''') + # print(f'''\ + # Run prediction on {tasks} + # Received context: {context} + # Label config: {self.label_config} + # Parsed JSON Label config: {self.parsed_label_config}''') + + # Project ID: {self.project_id} + # model.eval() + + self.from_name, self.to_name, self.value = self.get_first_tag_occurence('RectangleLabels', 'Image') + + imgs = [] + lengths = [] + + # loading all images into lists + for task in tasks: + + raw_img_path = task['data']['image'] + + try: + img_path = get_image_local_path( + raw_img_path, + label_studio_access_token=LABEL_STUDIO_ACCESS_TOKEN, + label_studio_host=LABEL_STUDIO_HOST + ) + print(f"the real image path is {img_path}") + except: + img_path = raw_img_path + + img = Image.open(img_path) - # TODO: here figure out what type of prediction we are looking for -> classification, segmentation, bounding boxes, etc. - context = "classification" + imgs.append(img) - if context=="classification": - model = YOLO('yolov8n-cls.pt') + W, H = img.size + lengths.append((H, W)) + # predicting from PIL loaded images + results = model.predict(source=imgs) # define model earlier - img = 'https://ultralytics.com/images/bus.jpg' + # each item will be the predictions for a task + predictions = [] + # basically, running this loop for each task + for (result, len) in zip(results, lengths): + boxes = result.boxes.cpu().numpy() + + print(result.names) + + print(f"the confidences are {boxes.conf}") + + predictions.append(self.get_results(boxes.xywh, boxes.cls, len, boxes.conf, result.names)) - results = model(img) + # # TODO: here figure out what type of prediction we are looking for -> classification, segmentation, bounding boxes, etc. + # context = "classification" + + # if context=="classification": + # model = YOLO('yolov8n-cls.pt') + # img = 'https://ultralytics.com/images/bus.jpg' + # results = model(img) + + # find how to get images and labels from different places YOLO + + return predictions + + def get_results(self, boxes, classes, length, confidences, names_dict): + results = [] + + for box, name, conf in zip(boxes, classes, confidences): + + label_id = str(uuid4())[:9] + + x, y, w, h = box + height, width = length - return [] + + results.append({ + 'id': label_id, + 'from_name': self.from_name, + 'to_name': self.to_name, + 'original_width': int(width), + 'original_height': int(height), + 'image_rotation': 0, + 'value': { + 'rotation': 0, + # 'rectanglelabels': [self.class_to_name[f"{int(name)}"]], + 'rectanglelabels': [names_dict[int(name)]], + 'width': w / width * 100, # this is correcrt + 'height': h / height * 100, # this is also correct + 'x': (x - 0.5*w) / width * 100, + 'y': (y-0.5*h) / height * 100 + }, + 'score': conf.item(), + 'type': 'rectanglelabels', + 'readonly': False + }) + + return { + 'result': results + } def fit(self, event, data, **kwargs): """ @@ -46,6 +205,135 @@ def fit(self, event, data, **kwargs): :param data: the payload received from the event (check [Webhook event reference](https://labelstud.io/guide/webhook_reference.html)) """ + # model.train() + + print(f"the fit is {data}") + # results = data["annotation"]["result"] + # ^ this will be a list of all the rectangles you are fine tuning + + # figure out how to do this with multiple images at once + + results = data['annotation']['result'] + data = data['task']['data'] + image_path = data['image'] + image_paths = [image_path] + + true_img_paths = [] + for raw_img_path in image_paths: + try: + img_path = get_image_local_path( + raw_img_path, + label_studio_access_token=LABEL_STUDIO_ACCESS_TOKEN, + label_studio_host=LABEL_STUDIO_HOST + ) + except: + img_path = raw_img_path + + img = Image.open(img_path) + + name = raw_img_path.split("/")[-1] + + true_img_paths.append(img_path) + + # im_save = img.save(f"dataset/images/{name}") + + sample_img_path = true_img_paths[0] + + img = Image.open(sample_img_path) + + project_path = sample_img_path.split("/")[:-1] + image_name = sample_img_path.split("/")[-1] + + print(f"image name is {image_name}") + + img1 = img.save(f"./datasets/temp/images/{image_name}") + img2 = img.save(f"./datasets/temp/images/(2){image_name}") + + # these rename the directories for label studio format + # os.rename(project_path.split("/"), project_path.replace((f"/{project_path.split('/')[-2]}/"), "images")) + + # # making the labels directory + # os.mkdir(img_path.split("/")[:-1], "labels") + + # now saving text file labels + txt_name = (image_path.split('/')[-1]).split('.')[0] + + with open(f'./datasets/temp/labels/{txt_name}.txt', 'w') as f: + f.write("") + with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'w') as f: + f.write("") + + + for result in results: + value = result['value'] + label = value['rectanglelabels'][0] + + # these are out of 100, so you need to convert them back + x = value['x'] + y = value['y'] + width = value['width'] + height = value['height'] + + orig_width = result['original_width'] + orig_height = result['original_height'] + + + # doing the inverse of these operation, but keeping it normalized + # 'width': w / width * 100, # this is correcrt + # 'height': h / height * 100, # this is also correct + # 'x': (x - 0.5*w) / width * 100, + # 'y': (y-0.5*h) / height * 100 + + # so, in YOLO format, we just need to to be normalize to 1 + + w = width / 100 + h = height / 100 + trans_x = (x / 100) + 0.5 * w + trans_y = (y / 100) + 0.5 * h + + # now getting the class label + label = self.name_to_class.get(label) + + with open(f'./datasets/temp/labels/{txt_name}.txt', 'a') as f: + f.write(f"{label} {trans_x} {trans_y} {w} {h}\n") + with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: + f.write(f"{label} {trans_x} {trans_y} {w} {h}\n") + + + results = model.train(data='train_config.yml', epochs = 1, imgsz=640) + # indexing error if there is only one image + # do two images or more images for no error + + + + # you can send a list of images into the YAML file + # so we can just save thelabels int eh data upload directory? + # for now let's just work on saving all the images in a new directory and then using that? + + # this is assuming all images are in a list + + + # TODO: make sure this rewrites whatever images were already there + # having so many images rewritten is a time consuming process - think of a way to mitigate this + + """Here is the process + + - whatever project you are in, rename that project to images + - create a labels directory there as well + - make sure the above doesn't break label studio + - put the label text files in there + - create a images.txt file that contains only the paths of the images that have been chosen by the user + - remove the txt files when done and labels and RENAME back the labels directory + """ + + # setting the new model + # self.set("new_model", model) + # set a new model version + + + print(f"the event is {event}") # ANNOTATION CREATED + print(f"kwargs are {kwargs}") + # use cache to retrieve the data from the previous fit() runs old_data = self.get('my_data') old_model_version = self.get('model_version') @@ -60,3 +348,12 @@ def fit(self, event, data, **kwargs): print('fit() completed successfully.') + + # setting the new model -> this is only key settting so that we can retrieve it later + # self.set("new_model", model) + + # save the model to the directory + torch.save(model.state_dict(), 'yolov8n(testing).pt') + + # return {'model_file': 'yolov8n(testing).pt'} + From 65223947a1f2ae210b6b8c8fdd15abfb3b9893ce Mon Sep 17 00:00:00 2001 From: shondle Date: Sun, 5 Nov 2023 10:27:11 -0600 Subject: [PATCH 04/21] Change model config workflow --- label_studio_ml/examples/yolov8/model.py | 118 +++++++++++++++-------- 1 file changed, 80 insertions(+), 38 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 925af37cc..7059ea3ac 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -13,21 +13,46 @@ import yaml +# TODO: use the best.pt saved to load nstead +# https://github.com/ultralytics/ultralytics/issues/2750#issuecomment-1556847848 + + LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN") LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") # change config file depending on how many classes there are in the saved model -NEW_START = False +NEW_START = True + + + +# defining model start +model = YOLO('yolov8n.pt') +# add logic that creates it from regular here if NEW_START: - model = YOLO('yolov8n.pt') + custom_model = YOLO('yolov8n(custom).pt') else: - model = YOLO('./yolov8n.yml') - # model = torch.hub.load('ultralytics/yolov8', 'yolov8n', classes=2) - model.load_state_dict(torch.load('yolov8n(testing).pt')) - # model.eval() + custom_model = YOLO('yolov8n(custom).pt') + +# else: +# dir_path = './runs/detect/' +# folders = os.listdir(dir_path) +# # sorted_folders = sorted(folders, key=lambda x: int(x.split("train")[-1])) +# import re +# # https://stackoverflow.com/questions/4623446/how-do-you-sort-files-numerically +# sorted_folders = folders.sort(key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)]) + +# for folder in folders: +# split = folder.split("train")[-1] + +# if split is not "train": + + +# last_folder = folders[-1] +# print(f"the last folder is {last_folder} and {folders}") +# model = YOLO(f'./runs/detect/{last_folder}/weights/best.pt') # TODO: # figure out how to integrate class names for things not predicted @@ -39,49 +64,51 @@ def __init__(self, project_id, **kwargs): super(YOLO, self).__init__(**kwargs) self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps - - # read this from the config file - self.class_to_name = { - 0: "cats", - 1: "dog" - } - # print(self.label_config) # print(self.parsed_label_config) - - # TODO: this should all be done before loading the model - - + # this just needs to be done before training, not model loading # create a new YAML file for training parsed = self.parsed_label_config classes = parsed['label']['labels'] - self.class_to_name = {i:v for i,v in enumerate(classes)} - self.name_to_class = {v:k for k, v in self.class_to_name.items()} - input_file = "train_config.yml" + # if they change the labelling config, it shouldn't automatically destroy everything + input_file = "custom_config.yml" with open(input_file, "r") as file: data = yaml.safe_load(file) - - data["names"] = self.class_to_name - with open(input_file, "w") as file: - yaml.dump(data, file, default_flow_style=False) + if NEW_START: - # TODO: adjust num_classes in the yolov8 yaml file as well - weights_file = "yolov8.yml" - with open(weights_file, "r") as file: - weights = yaml.safe_load(file) - - weights["nc"] = len(self.class_to_name) + self.class_to_name = {i:v for i,v in enumerate(classes)} + + data["names"] = self.class_to_name - with open(weights_file, "w") as file: - yaml.dump(weights, file, default_flow_style=False) + with open(input_file, "w") as file: + yaml.dump(data, file, default_flow_style=False) + else: + self.class_to_name = data["names"] - # model = YOLO('./yolov8n.yml') - # # model = torch.hub.load('ultralytics/yolov8', 'yolov8n', classes=2) - # model.load_state_dict(torch.load('yolov8n(testing).pt')) + print(f"self class to name is {self.class_to_name}") + self.name_to_class = {v:k for k, v in self.class_to_name.items()} + + + # logic for using predefined YOLO classes + + + + # TODO: get from docker -> user dictionary that maps labelling config to COCO classes + config_to_COCO = { + "cats": "cat", + "lights": "traffic light", + "cars": "car", + } + + # TODO: use google images V7 instead for access to more classes + # calculate box overlap and remove the new model in that case + # otherwise, for now, just wait until predictions overlap then add a flag where you just use the new model now + + @@ -217,6 +244,7 @@ def fit(self, event, data, **kwargs): data = data['task']['data'] image_path = data['image'] image_paths = [image_path] + all_new_paths = [] true_img_paths = [] for raw_img_path in image_paths: @@ -249,6 +277,9 @@ def fit(self, event, data, **kwargs): img1 = img.save(f"./datasets/temp/images/{image_name}") img2 = img.save(f"./datasets/temp/images/(2){image_name}") + all_new_paths.append(f"./datasets/temp/images/{image_name}") + all_new_paths.append(f"./datasets/temp/images/(2){image_name}") + # these rename the directories for label studio format # os.rename(project_path.split("/"), project_path.replace((f"/{project_path.split('/')[-2]}/"), "images")) @@ -259,10 +290,13 @@ def fit(self, event, data, **kwargs): txt_name = (image_path.split('/')[-1]).split('.')[0] with open(f'./datasets/temp/labels/{txt_name}.txt', 'w') as f: - f.write("") + f.write("") with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'w') as f: f.write("") + all_new_paths.append(f'./datasets/temp/labels/{txt_name}.txt') + all_new_paths.append(f'./datasets/temp/labels/(2){txt_name}.txt') + for result in results: value = result['value'] @@ -303,6 +337,9 @@ def fit(self, event, data, **kwargs): results = model.train(data='train_config.yml', epochs = 1, imgsz=640) # indexing error if there is only one image # do two images or more images for no error + + # remove all these files so train starts from nothing next time + self.remove_train_files(all_new_paths) @@ -316,6 +353,8 @@ def fit(self, event, data, **kwargs): # TODO: make sure this rewrites whatever images were already there # having so many images rewritten is a time consuming process - think of a way to mitigate this + + """Here is the process - whatever project you are in, rename that project to images @@ -353,7 +392,10 @@ def fit(self, event, data, **kwargs): # self.set("new_model", model) # save the model to the directory - torch.save(model.state_dict(), 'yolov8n(testing).pt') + # torch.save(model.state_dict(), 'yolov8n(testing).pt') + + def remove_train_files(self, file_paths): + for path in file_paths: + os.remove(path) - # return {'model_file': 'yolov8n(testing).pt'} From 323e7521b2b402d3f6ae43dac2d30d784476399d Mon Sep 17 00:00:00 2001 From: shondle Date: Sun, 5 Nov 2023 14:08:23 -0600 Subject: [PATCH 05/21] Add TODO comments --- label_studio_ml/examples/yolov8/model.py | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 7059ea3ac..39d119c1e 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -21,6 +21,33 @@ LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") +"""TODO + +CHOOSING THE RIGHT MODEL FOR PREDICTIONS AND PRETRAINING +1. add two models - 1 YOLO that will stay the same, 1 that will be used for fine tune + - dataset guide: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml +2. only fine tune the second model + +GETTING THE RIGHT PREDICTIONS FROM BOTH THE MODELS + +One option - have 2 models, use the second model for classes not contained in the first model, and the first model for classes that are already contained +Note: if they want to train the best model, they'll have to train from scratch anyways (not just use the one from Label Studio). + + +CHOOSING CLASSES THAT OVERLAP WITH PRETRAINED YOLO +1. exchange YOLO with google v7 images model with more classes - https://docs.ultralytics.com/models/yolov8/#supported-modes +2. let user give overlaps in the docker file + +## ^ after the above, send a PR + + +FINE TUNING ON MULTIPLE IMAGES AT A TIME +1. add the ability to batch send things to train the second model +2. integrate DINO in to train the model on a bunch of images + +""" + + # change config file depending on how many classes there are in the saved model NEW_START = True From de5ed6a8f60650743f457f4deb1398f150d5bf92 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 6 Nov 2023 12:36:12 -0600 Subject: [PATCH 06/21] Add two models --- label_studio_ml/examples/yolov8/model.py | 125 ++++++++++++++--------- 1 file changed, 76 insertions(+), 49 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 39d119c1e..346d7fc75 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -23,15 +23,20 @@ """TODO +TODO: PROBLEM-------- +make sure the classes are cut before using the custom model, otherwise it will double up on predictions and then provide output labels with the None type + CHOOSING THE RIGHT MODEL FOR PREDICTIONS AND PRETRAINING 1. add two models - 1 YOLO that will stay the same, 1 that will be used for fine tune - dataset guide: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml 2. only fine tune the second model +CHECK FINE TUNING COORDINATE LOCATIONT TRANSFORMS GETTING THE RIGHT PREDICTIONS FROM BOTH THE MODELS One option - have 2 models, use the second model for classes not contained in the first model, and the first model for classes that are already contained Note: if they want to train the best model, they'll have to train from scratch anyways (not just use the one from Label Studio). +ALSO REMOVE UNNCECESSARY CLASSES FROM BEING USED FROM THE FIRST MODEL CHOOSING CLASSES THAT OVERLAP WITH PRETRAINED YOLO @@ -42,7 +47,7 @@ FINE TUNING ON MULTIPLE IMAGES AT A TIME -1. add the ability to batch send things to train the second model +1. add the ability to batch send things to train the models 2. integrate DINO in to train the model on a bunch of images """ @@ -55,7 +60,7 @@ # defining model start -model = YOLO('yolov8n.pt') +pretrained_model = YOLO('yolov8n.pt') # add logic that creates it from regular here if NEW_START: @@ -99,6 +104,19 @@ def __init__(self, project_id, **kwargs): parsed = self.parsed_label_config classes = parsed['label']['labels'] + # TODO: get from docker -> user dictionary that maps labelling config to COCO classes + label_to_COCO = { + "cats": "cat", + "lights": "traffic light", + "cars": "car", + } + + self.COCO_to_label = {v:k for k, v in label_to_COCO.items()} + + first_label_classes = list(label_to_COCO.keys()) # raw labels from labelling config + second_label_classes = [x for x in classes if x not in set(first_label_classes)] # raw labels from labelling config + + # if they change the labelling config, it shouldn't automatically destroy everything input_file = "custom_config.yml" @@ -107,30 +125,21 @@ def __init__(self, project_id, **kwargs): if NEW_START: - self.class_to_name = {i:v for i,v in enumerate(classes)} + self.custom_num_to_name = {i:v for i,v in enumerate(second_label_classes)} - data["names"] = self.class_to_name + data["names"] = self.custom_num_to_name with open(input_file, "w") as file: yaml.dump(data, file, default_flow_style=False) else: - self.class_to_name = data["names"] + self.custom_num_to_name = data["names"] - print(f"self class to name is {self.class_to_name}") - self.name_to_class = {v:k for k, v in self.class_to_name.items()} + print(f"self class to name is {self.custom_num_to_name}") + self.custom_name_to_num = {v:k for k, v in self.custom_num_to_name.items()} # logic for using predefined YOLO classes - - - # TODO: get from docker -> user dictionary that maps labelling config to COCO classes - config_to_COCO = { - "cats": "cat", - "lights": "traffic light", - "cars": "car", - } - # TODO: use google images V7 instead for access to more classes # calculate box overlap and remove the new model in that case # otherwise, for now, just wait until predictions overlap then add a flag where you just use the new model now @@ -183,20 +192,25 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - lengths.append((H, W)) # predicting from PIL loaded images - results = model.predict(source=imgs) # define model earlier + results_1 = pretrained_model.predict(source=imgs) # define model earlier + results_2 = custom_model.predict(source=imgs) # each item will be the predictions for a task predictions = [] # basically, running this loop for each task - for (result, len) in zip(results, lengths): - boxes = result.boxes.cpu().numpy() + for res_num, results in enumerate([results_1, results_2]): + + for (result, len) in zip(results, lengths): + boxes = result.boxes.cpu().numpy() - print(result.names) + print(result.names) # gives dict matching num to names ex. {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4 - print(f"the confidences are {boxes.conf}") + print(f"the confidences are {boxes.conf}") - predictions.append(self.get_results(boxes.xywh, boxes.cls, len, boxes.conf, result.names)) + pretrained = True if res_num == 1 else False + # results names + predictions.append(self.get_results(boxes.xywh, boxes.cls, len, boxes.conf, result.names, pretrained=pretrained)) @@ -212,10 +226,12 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - return predictions - def get_results(self, boxes, classes, length, confidences, names_dict): + def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pretrained=True): results = [] - for box, name, conf in zip(boxes, classes, confidences): + print(f"the to and from names are {self.from_name} and {self.to_name}") + + for box, class_num, conf in zip(boxes, classes, confidences): label_id = str(uuid4())[:9] @@ -223,6 +239,14 @@ def get_results(self, boxes, classes, length, confidences, names_dict): height, width = length + if pretrained: + name = num_to_names_dict[int(class_num)] + label = self.COCO_to_label.get(name) + print(f"class num is {class_num} and name is {name}") + else: # then, we are using the custom model + label = num_to_names_dict[int(class_num)] + + print(f"the labellllllll is {label}") results.append({ 'id': label_id, @@ -234,7 +258,7 @@ def get_results(self, boxes, classes, length, confidences, names_dict): 'value': { 'rotation': 0, # 'rectanglelabels': [self.class_to_name[f"{int(name)}"]], - 'rectanglelabels': [names_dict[int(name)]], + 'rectanglelabels': [label], 'width': w / width * 100, # this is correcrt 'height': h / height * 100, # this is also correct 'x': (x - 0.5*w) / width * 100, @@ -326,42 +350,45 @@ def fit(self, event, data, **kwargs): for result in results: + value = result['value'] label = value['rectanglelabels'][0] + + if label in self.custom_name_to_num: - # these are out of 100, so you need to convert them back - x = value['x'] - y = value['y'] - width = value['width'] - height = value['height'] + # these are out of 100, so you need to convert them back + x = value['x'] + y = value['y'] + width = value['width'] + height = value['height'] - orig_width = result['original_width'] - orig_height = result['original_height'] + orig_width = result['original_width'] + orig_height = result['original_height'] - # doing the inverse of these operation, but keeping it normalized - # 'width': w / width * 100, # this is correcrt - # 'height': h / height * 100, # this is also correct - # 'x': (x - 0.5*w) / width * 100, - # 'y': (y-0.5*h) / height * 100 + # doing the inverse of these operation, but keeping it normalized + # 'width': w / width * 100, # this is correcrt + # 'height': h / height * 100, # this is also correct + # 'x': (x - 0.5*w) / width * 100, + # 'y': (y-0.5*h) / height * 100 - # so, in YOLO format, we just need to to be normalize to 1 + # so, in YOLO format, we just need to to be normalize to 1 - w = width / 100 - h = height / 100 - trans_x = (x / 100) + 0.5 * w - trans_y = (y / 100) + 0.5 * h + w = width / 100 + h = height / 100 + trans_x = (x / 100) + 0.5 * w + trans_y = (y / 100) + 0.5 * h - # now getting the class label - label = self.name_to_class.get(label) + # now getting the class label + label_num = self.custom_name_to_num.get(label) - with open(f'./datasets/temp/labels/{txt_name}.txt', 'a') as f: - f.write(f"{label} {trans_x} {trans_y} {w} {h}\n") - with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: - f.write(f"{label} {trans_x} {trans_y} {w} {h}\n") + with open(f'./datasets/temp/labels/{txt_name}.txt', 'a') as f: + f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") + with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: + f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") - results = model.train(data='train_config.yml', epochs = 1, imgsz=640) + results = custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) # indexing error if there is only one image # do two images or more images for no error From 8c7d5ede08ea308a0af3b76a15e604c9c40b774d Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 13 Nov 2023 00:03:28 -0600 Subject: [PATCH 07/21] Add docker new start relation --- label_studio_ml/examples/yolov8/model.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 346d7fc75..efbf28ef2 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -50,6 +50,8 @@ 1. add the ability to batch send things to train the models 2. integrate DINO in to train the model on a bunch of images +ADD FLAG TO + """ @@ -122,7 +124,8 @@ def __init__(self, project_id, **kwargs): input_file = "custom_config.yml" with open(input_file, "r") as file: data = yaml.safe_load(file) - + + # obious way to toggle if NEW_START: self.custom_num_to_name = {i:v for i,v in enumerate(second_label_classes)} @@ -248,6 +251,11 @@ def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pr print(f"the labellllllll is {label}") + if label==None: + print(f"it's none {label}") + continue + + print("but we're still going") results.append({ 'id': label_id, 'from_name': self.from_name, From dd95a349ce980a60e98d56263b11b0a2d458145a Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 13 Nov 2023 00:05:08 -0600 Subject: [PATCH 08/21] Add models to class object --- label_studio_ml/examples/yolov8/model.py | 172 ++++++++--------------- 1 file changed, 58 insertions(+), 114 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index efbf28ef2..bfaba03a4 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -12,7 +12,10 @@ import os import yaml +import shutil + +"""USE THIS TO UPDATE WHICH MODEL YOU ARE USING""" # TODO: use the best.pt saved to load nstead # https://github.com/ultralytics/ultralytics/issues/2750#issuecomment-1556847848 @@ -23,95 +26,62 @@ """TODO -TODO: PROBLEM-------- -make sure the classes are cut before using the custom model, otherwise it will double up on predictions and then provide output labels with the None type - -CHOOSING THE RIGHT MODEL FOR PREDICTIONS AND PRETRAINING -1. add two models - 1 YOLO that will stay the same, 1 that will be used for fine tune - - dataset guide: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml -2. only fine tune the second model -CHECK FINE TUNING COORDINATE LOCATIONT TRANSFORMS - -GETTING THE RIGHT PREDICTIONS FROM BOTH THE MODELS - -One option - have 2 models, use the second model for classes not contained in the first model, and the first model for classes that are already contained -Note: if they want to train the best model, they'll have to train from scratch anyways (not just use the one from Label Studio). -ALSO REMOVE UNNCECESSARY CLASSES FROM BEING USED FROM THE FIRST MODEL - - -CHOOSING CLASSES THAT OVERLAP WITH PRETRAINED YOLO -1. exchange YOLO with google v7 images model with more classes - https://docs.ultralytics.com/models/yolov8/#supported-modes 2. let user give overlaps in the docker file ## ^ after the above, send a PR -FINE TUNING ON MULTIPLE IMAGES AT A TIME -1. add the ability to batch send things to train the models -2. integrate DINO in to train the model on a bunch of images - -ADD FLAG TO - """ -# change config file depending on how many classes there are in the saved model +class YOLO(LabelStudioMLBase): -NEW_START = True + def __init__(self, project_id, **kwargs): + super(YOLO, self).__init__(**kwargs) + self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps + print(self.label_config) + # print(self.parsed_label_config) -# defining model start -pretrained_model = YOLO('yolov8n.pt') -# add logic that creates it from regular here -if NEW_START: - custom_model = YOLO('yolov8n(custom).pt') -else: - custom_model = YOLO('yolov8n(custom).pt') + # parsed = self.parsed_label_config + classes = parsed['label']['labels'] -# else: -# dir_path = './runs/detect/' -# folders = os.listdir(dir_path) -# # sorted_folders = sorted(folders, key=lambda x: int(x.split("train")[-1])) -# import re -# # https://stackoverflow.com/questions/4623446/how-do-you-sort-files-numerically -# sorted_folders = folders.sort(key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)]) - -# for folder in folders: -# split = folder.split("train")[-1] -# if split is not "train": + with open("ls_config.yml", "r") as file: + ls_config = yaml.safe_load(file) -# last_folder = folders[-1] -# print(f"the last folder is {last_folder} and {folders}") -# model = YOLO(f'./runs/detect/{last_folder}/weights/best.pt') + label_to_COCO = ls_config["labels_to_coco"] + self.NEW_START = True if label_to_COCO['NEW_START']=='True' else False + self.JUST_CUSTOM = True if label_to_COCO['JUST_CUSTOM']=='True' else False -# TODO: -# figure out how to integrate class names for things not predicted + print(f"{self.NEW_START} and {self.JUST_CUSTOM}") -class YOLO(LabelStudioMLBase): + # TODO: get from docker + # label_to_COCO = { + # "cats": "Cat", + # "lights": "Traffic light", + # "cars": "Car", + # } + - def __init__(self, project_id, **kwargs): - super(YOLO, self).__init__(**kwargs) - self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps - # print(self.label_config) - # print(self.parsed_label_config) + # defining model start - # this just needs to be done before training, not model loading - # create a new YAML file for training - parsed = self.parsed_label_config - classes = parsed['label']['labels'] + if not self.JUST_CUSTOM: + self.pretrained_model = YOLO('yolov8n-oiv7.pt') + + # add logic that creates it from regular here + if self.NEW_START: + shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') + self.custom_model = YOLO('yolov8n(custom).pt') + FIRST_USE = True + else: + self.custom_model = YOLO('yolov8n(custom).pt') - # TODO: get from docker -> user dictionary that maps labelling config to COCO classes - label_to_COCO = { - "cats": "cat", - "lights": "traffic light", - "cars": "car", - } self.COCO_to_label = {v:k for k, v in label_to_COCO.items()} @@ -126,7 +96,7 @@ def __init__(self, project_id, **kwargs): data = yaml.safe_load(file) # obious way to toggle - if NEW_START: + if self.NEW_START: self.custom_num_to_name = {i:v for i,v in enumerate(second_label_classes)} @@ -141,15 +111,6 @@ def __init__(self, project_id, **kwargs): self.custom_name_to_num = {v:k for k, v in self.custom_num_to_name.items()} - # logic for using predefined YOLO classes - - # TODO: use google images V7 instead for access to more classes - # calculate box overlap and remove the new model in that case - # otherwise, for now, just wait until predictions overlap then add a flag where you just use the new model now - - - - print(classes) def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: @@ -195,14 +156,23 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - lengths.append((H, W)) # predicting from PIL loaded images - results_1 = pretrained_model.predict(source=imgs) # define model earlier - results_2 = custom_model.predict(source=imgs) + if not self.JUST_CUSTOM: + results_1 = self.pretrained_model.predict(source=imgs) # define model earlier + else: + results_1 = None + + if not self.FIRST_USE: + results_2 = self.custom_model.predict(source=imgs) + else: + results_2 = None # each item will be the predictions for a task predictions = [] # basically, running this loop for each task for res_num, results in enumerate([results_1, results_2]): + if results == None: + continue for (result, len) in zip(results, lengths): boxes = result.boxes.cpu().numpy() @@ -211,21 +181,9 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - print(f"the confidences are {boxes.conf}") - pretrained = True if res_num == 1 else False + pretrained = True if res_num == 0 else False # results names predictions.append(self.get_results(boxes.xywh, boxes.cls, len, boxes.conf, result.names, pretrained=pretrained)) - - - - # # TODO: here figure out what type of prediction we are looking for -> classification, segmentation, bounding boxes, etc. - # context = "classification" - - # if context=="classification": - # model = YOLO('yolov8n-cls.pt') - # img = 'https://ultralytics.com/images/bus.jpg' - # results = model(img) - - # find how to get images and labels from different places YOLO return predictions @@ -265,10 +223,9 @@ def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pr 'image_rotation': 0, 'value': { 'rotation': 0, - # 'rectanglelabels': [self.class_to_name[f"{int(name)}"]], 'rectanglelabels': [label], - 'width': w / width * 100, # this is correcrt - 'height': h / height * 100, # this is also correct + 'width': w / width * 100, + 'height': h / height * 100, 'x': (x - 0.5*w) / width * 100, 'y': (y-0.5*h) / height * 100 }, @@ -291,14 +248,6 @@ def fit(self, event, data, **kwargs): :param data: the payload received from the event (check [Webhook event reference](https://labelstud.io/guide/webhook_reference.html)) """ - # model.train() - - print(f"the fit is {data}") - # results = data["annotation"]["result"] - # ^ this will be a list of all the rectangles you are fine tuning - - # figure out how to do this with multiple images at once - results = data['annotation']['result'] data = data['task']['data'] image_path = data['image'] @@ -322,8 +271,6 @@ def fit(self, event, data, **kwargs): true_img_paths.append(img_path) - # im_save = img.save(f"dataset/images/{name}") - sample_img_path = true_img_paths[0] img = Image.open(sample_img_path) @@ -339,14 +286,8 @@ def fit(self, event, data, **kwargs): all_new_paths.append(f"./datasets/temp/images/{image_name}") all_new_paths.append(f"./datasets/temp/images/(2){image_name}") - # these rename the directories for label studio format - # os.rename(project_path.split("/"), project_path.replace((f"/{project_path.split('/')[-2]}/"), "images")) - - # # making the labels directory - # os.mkdir(img_path.split("/")[:-1], "labels") - # now saving text file labels - txt_name = (image_path.split('/')[-1]).split('.')[0] + txt_name = (image_path.split('/')[-1]).rsplit('.', 1)[0] with open(f'./datasets/temp/labels/{txt_name}.txt', 'w') as f: f.write("") @@ -384,8 +325,8 @@ def fit(self, event, data, **kwargs): w = width / 100 h = height / 100 - trans_x = (x / 100) + 0.5 * w - trans_y = (y / 100) + 0.5 * h + trans_x = (x / 100) + (0.5 * w) + trans_y = (y / 100) + (0.5 * h) # now getting the class label label_num = self.custom_name_to_num.get(label) @@ -396,12 +337,15 @@ def fit(self, event, data, **kwargs): f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") - results = custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) + results = self.custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) + + FIRST_USE = False + # indexing error if there is only one image # do two images or more images for no error # remove all these files so train starts from nothing next time - self.remove_train_files(all_new_paths) + # self.remove_train_files(all_new_paths) From 61c1e612d5b3362f00044d32da82297ca417a347 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 13 Nov 2023 00:32:59 -0600 Subject: [PATCH 09/21] Add FIRST_USE flag --- label_studio_ml/examples/yolov8/model.py | 69 +++++++++++++----------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index bfaba03a4..41e46d8d0 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -15,11 +15,6 @@ import shutil -"""USE THIS TO UPDATE WHICH MODEL YOU ARE USING""" -# TODO: use the best.pt saved to load nstead -# https://github.com/ultralytics/ultralytics/issues/2750#issuecomment-1556847848 - - LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN") LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") @@ -30,9 +25,27 @@ ## ^ after the above, send a PR +worry about first use FIRST_USE key +how to switch off new start automatically after first time + """ +JUST_CUSTOM = False +NEW_START = False + +if not JUST_CUSTOM: + pretrained_model = YOLO('yolov8n-oiv7.pt') + +# add logic that creates it from regular here +if NEW_START: + shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') + custom_model = YOLO('yolov8n(custom).pt') + FIRST_USE = True +else: + custom_model = YOLO('yolov8n(custom).pt') + FIRST_USE = False + class YOLO(LabelStudioMLBase): @@ -41,11 +54,15 @@ def __init__(self, project_id, **kwargs): self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps print(self.label_config) - # print(self.parsed_label_config) + print(self.parsed_label_config) + self.first_use = FIRST_USE + + - # parsed = self.parsed_label_config + + parsed = self.parsed_label_config classes = parsed['label']['labels'] @@ -54,8 +71,11 @@ def __init__(self, project_id, **kwargs): label_to_COCO = ls_config["labels_to_coco"] - self.NEW_START = True if label_to_COCO['NEW_START']=='True' else False - self.JUST_CUSTOM = True if label_to_COCO['JUST_CUSTOM']=='True' else False + # self.NEW_START = True if ls_config.get('NEW_START')=='True' else False + # self.JUST_CUSTOM = True if ls_config.get('JUST_CUSTOM')=='True' else False + + self.NEW_START = NEW_START + self.JUST_CUSTOM = JUST_CUSTOM print(f"{self.NEW_START} and {self.JUST_CUSTOM}") @@ -71,17 +91,6 @@ def __init__(self, project_id, **kwargs): # defining model start - if not self.JUST_CUSTOM: - self.pretrained_model = YOLO('yolov8n-oiv7.pt') - - # add logic that creates it from regular here - if self.NEW_START: - shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') - self.custom_model = YOLO('yolov8n(custom).pt') - FIRST_USE = True - else: - self.custom_model = YOLO('yolov8n(custom).pt') - self.COCO_to_label = {v:k for k, v in label_to_COCO.items()} @@ -157,12 +166,15 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - # predicting from PIL loaded images if not self.JUST_CUSTOM: - results_1 = self.pretrained_model.predict(source=imgs) # define model earlier + results_1 = pretrained_model.predict(source=imgs) # define model earlier else: results_1 = None - if not self.FIRST_USE: - results_2 = self.custom_model.predict(source=imgs) + + # we don't want the predictions from the pretrained version of the custom model + # because it hasn't reshaped to the new classes yet + if not self.first_use: + results_2 = custom_model.predict(source=imgs) else: results_2 = None @@ -203,17 +215,12 @@ def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pr if pretrained: name = num_to_names_dict[int(class_num)] label = self.COCO_to_label.get(name) - print(f"class num is {class_num} and name is {name}") else: # then, we are using the custom model label = num_to_names_dict[int(class_num)] - print(f"the labellllllll is {label}") - if label==None: - print(f"it's none {label}") continue - print("but we're still going") results.append({ 'id': label_id, 'from_name': self.from_name, @@ -337,15 +344,15 @@ def fit(self, event, data, **kwargs): f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") - results = self.custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) + results = custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) - FIRST_USE = False + self.first_use = False # indexing error if there is only one image # do two images or more images for no error # remove all these files so train starts from nothing next time - # self.remove_train_files(all_new_paths) + self.remove_train_files(all_new_paths) From b198b241c49250295c2f80e339371bb92a5782ca Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 13 Nov 2023 00:54:45 -0600 Subject: [PATCH 10/21] Remove comments --- label_studio_ml/examples/yolov8/model.py | 145 +++-------------------- 1 file changed, 16 insertions(+), 129 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 41e46d8d0..3989a23e2 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -5,39 +5,28 @@ import os from PIL import Image from uuid import uuid4 - from ultralytics import YOLO import torch - import os import yaml - import shutil LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN") LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") +JUST_CUSTOM = os.environ.get("JUST_CUSTOM") -"""TODO - -2. let user give overlaps in the docker file - -## ^ after the above, send a PR - -worry about first use FIRST_USE key -how to switch off new start automatically after first time - - -""" - +# TODO: delete this line JUST_CUSTOM = False -NEW_START = False + +# checks if you have already built a custom model +# if you want to do it for a new task, move this model out of the directory +NEW_START = os.path.isfile('yolov8n(custom).pt') if not JUST_CUSTOM: pretrained_model = YOLO('yolov8n-oiv7.pt') -# add logic that creates it from regular here if NEW_START: shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') custom_model = YOLO('yolov8n(custom).pt') @@ -58,39 +47,17 @@ def __init__(self, project_id, **kwargs): self.first_use = FIRST_USE - - - - parsed = self.parsed_label_config classes = parsed['label']['labels'] - - with open("ls_config.yml", "r") as file: + with open("label_to_coco.yml", "r") as file: ls_config = yaml.safe_load(file) label_to_COCO = ls_config["labels_to_coco"] - # self.NEW_START = True if ls_config.get('NEW_START')=='True' else False - # self.JUST_CUSTOM = True if ls_config.get('JUST_CUSTOM')=='True' else False self.NEW_START = NEW_START - self.JUST_CUSTOM = JUST_CUSTOM - - print(f"{self.NEW_START} and {self.JUST_CUSTOM}") - - - # TODO: get from docker - # label_to_COCO = { - # "cats": "Cat", - # "lights": "Traffic light", - # "cars": "Car", - # } - - - - # defining model start - + self.JUST_CUSTOM = JUST_CUSTOM self.COCO_to_label = {v:k for k, v in label_to_COCO.items()} @@ -98,13 +65,13 @@ def __init__(self, project_id, **kwargs): second_label_classes = [x for x in classes if x not in set(first_label_classes)] # raw labels from labelling config + # if the user changes the labelling config, it shouldn't automatically destroy everything + # so only change it if we are starting brand new - # if they change the labelling config, it shouldn't automatically destroy everything input_file = "custom_config.yml" with open(input_file, "r") as file: data = yaml.safe_load(file) - # obious way to toggle if self.NEW_START: self.custom_num_to_name = {i:v for i,v in enumerate(second_label_classes)} @@ -123,19 +90,7 @@ def __init__(self, project_id, **kwargs): print(classes) def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: - """ Write your inference logic here - :param tasks: [Label Studio tasks in JSON format](https://labelstud.io/guide/task_format.html) - :param context: [Label Studio context in JSON format](https://labelstud.io/guide/ml.html#Passing-data-to-ML-backend) - :return predictions: [Predictions array in JSON format](https://labelstud.io/guide/export.html#Raw-JSON-format-of-completed-tasks) - """ - # print(f'''\ - # Run prediction on {tasks} - # Received context: {context} - # Label config: {self.label_config} - # Parsed JSON Label config: {self.parsed_label_config}''') - - # Project ID: {self.project_id} - # model.eval() + """ Inference logic for YOLO model """ self.from_name, self.to_name, self.value = self.get_first_tag_occurence('RectangleLabels', 'Image') @@ -200,6 +155,8 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - return predictions def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pretrained=True): + """This method returns annotation results that will be packaged and sent to Label Studio frontend""" + results = [] print(f"the to and from names are {self.from_name} and {self.to_name}") @@ -248,11 +205,7 @@ def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pr def fit(self, event, data, **kwargs): """ This method is called each time an annotation is created or updated - You can run your logic here to update the model and persist it to the cache - It is not recommended to perform long-running operations here, as it will block the main thread - Instead, consider running a separate process or a thread (like RQ worker) to perform the training - :param event: event type can be ('ANNOTATION_CREATED', 'ANNOTATION_UPDATED') - :param data: the payload received from the event (check [Webhook event reference](https://labelstud.io/guide/webhook_reference.html)) + You can run your logic here to update the model """ results = data['annotation']['result'] @@ -321,15 +274,6 @@ def fit(self, event, data, **kwargs): orig_width = result['original_width'] orig_height = result['original_height'] - - # doing the inverse of these operation, but keeping it normalized - # 'width': w / width * 100, # this is correcrt - # 'height': h / height * 100, # this is also correct - # 'x': (x - 0.5*w) / width * 100, - # 'y': (y-0.5*h) / height * 100 - - # so, in YOLO format, we just need to to be normalize to 1 - w = width / 100 h = height / 100 trans_x = (x / 100) + (0.5 * w) @@ -347,68 +291,11 @@ def fit(self, event, data, **kwargs): results = custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) self.first_use = False - - # indexing error if there is only one image - # do two images or more images for no error # remove all these files so train starts from nothing next time self.remove_train_files(all_new_paths) - - - - # you can send a list of images into the YAML file - # so we can just save thelabels int eh data upload directory? - # for now let's just work on saving all the images in a new directory and then using that? - - # this is assuming all images are in a list - - - # TODO: make sure this rewrites whatever images were already there - # having so many images rewritten is a time consuming process - think of a way to mitigate this - - - - """Here is the process - - - whatever project you are in, rename that project to images - - create a labels directory there as well - - make sure the above doesn't break label studio - - put the label text files in there - - create a images.txt file that contains only the paths of the images that have been chosen by the user - - remove the txt files when done and labels and RENAME back the labels directory - """ - - # setting the new model - # self.set("new_model", model) - # set a new model version - - - print(f"the event is {event}") # ANNOTATION CREATED - print(f"kwargs are {kwargs}") - - # use cache to retrieve the data from the previous fit() runs - old_data = self.get('my_data') - old_model_version = self.get('model_version') - print(f'Old data: {old_data}') - print(f'Old model version: {old_model_version}') - - # store new data to the cache - self.set('my_data', 'my_new_data_value') - self.set('model_version', 'my_new_model_version') - print(f'New data: {self.get("my_data")}') - print(f'New model version: {self.get("model_version")}') - - print('fit() completed successfully.') - - - # setting the new model -> this is only key settting so that we can retrieve it later - # self.set("new_model", model) - - # save the model to the directory - # torch.save(model.state_dict(), 'yolov8n(testing).pt') def remove_train_files(self, file_paths): + """This cleans the dataset directory""" for path in file_paths: - os.remove(path) - - + os.remove(path) \ No newline at end of file From 8b18b7a1195344811ba49303d8934d7183b32387 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 13 Nov 2023 09:47:04 -0600 Subject: [PATCH 11/21] Add more docker support --- label_studio_ml/examples/yolov8/Dockerfile | 8 ++- label_studio_ml/examples/yolov8/README.md | 57 ++++++++++++++++++- .../examples/yolov8/custom_config.yml | 7 +++ .../examples/yolov8/label_to_coco.yml | 7 +++ label_studio_ml/examples/yolov8/model.py | 30 ++++++---- .../examples/yolov8/requirements.txt | 50 +++++++++++++++- 6 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 label_studio_ml/examples/yolov8/custom_config.yml create mode 100644 label_studio_ml/examples/yolov8/label_to_coco.yml diff --git a/label_studio_ml/examples/yolov8/Dockerfile b/label_studio_ml/examples/yolov8/Dockerfile index d4ca437a2..d86a8ab17 100644 --- a/label_studio_ml/examples/yolov8/Dockerfile +++ b/label_studio_ml/examples/yolov8/Dockerfile @@ -1,8 +1,9 @@ -FROM python:3.8-slim +FROM python:3.11-slim RUN apt-get update && \ apt-get upgrade -y && \ - apt-get install -y git + apt-get install -y git wget && \ + apt-get -y install ffmpeg libsm6 libxext6 libffi-dev python3-dev gcc ENV PYTHONUNBUFFERED=True \ PORT=9090 \ @@ -14,6 +15,9 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +RUN wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt +RUN wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt + COPY . ./ CMD exec gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app diff --git a/label_studio_ml/examples/yolov8/README.md b/label_studio_ml/examples/yolov8/README.md index 1aaac3fda..c9611fa60 100644 --- a/label_studio_ml/examples/yolov8/README.md +++ b/label_studio_ml/examples/yolov8/README.md @@ -1 +1,56 @@ -Here is the project that integrates YOLOV8 with Label Studio. \ No newline at end of file +This project integrates the YOLOv8 model with Label Studio. + + +## How The Project Works + +This project helps you detect objects in Label Studio by doing two things. + +1 - Uses a pretrained YOLOv8 model on Google's Open Images V7 (OIV7) to provide a pretrained model on 600 classes! +2 - Use a custom model for classes in cases that don't fit under the 600 classes in the OIV7 dataset + +While annotating in label studio, you predefine which one of your labels overlap with the first pretrained model and custom labels that don't fit under the 600 classes are automatically used in the second custom model for predictions that is trained as you submit annotations in Label Studio. + +Predictions are then gathered using the OIV7 pretrained model and the custom model in Label Studio in milliseconds, where you can adjust annotations and fine tune your custom model for even more precise predictions. + + +## Setup + +1. Defining Classes for Pretrained and Custom Models + +Edit your labeling config to something like the following + +```xml + + + + + +``` + +In the `label_to_coco.yml` edit the dictionary to where the keys are the exact names of your rectangular labels in label studio and the values are the exact names of the same classes in [open-images-v7.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml). + +Any classes in your labeling config that you do not add to `label_to_coco.yml` will be trained using the second, custom model. + +Note: if you leave this YAML file empty with no keys and values, only the custom model will be trained and then used for predictions. In such a case, the model trained on 600 classes will not be used at all. + +2. Editing `docker-compose.yml` + +Set `LABEL_STUDIO_HOST` to your private IP address (which starts with 192 so ex. 192.168.1.1) with the port that label studio is running on. For example, your docker compose may look like `LABEL_STUDIO_HOST=192.168.1.1:8080` + +Set `LABEL_STUDIO_ACCESS_TOKEN` by going to your Label Studio Accounts & Settings, and then copying the Access Token. Paste it into the docker file. Ex. `LABEL_STUDIO_ACCESS_TOKEN=cjneskn2keoqpejleed8d8frje9992jdjdasvbfnwe2jsx` + +3. Running the backend + +Run `docker compose up` to start the backend. Under the `Machine Learning` settings in your project in Label Studio enter the following URL while adding the model: `http://{your_private_ip}:9090`. Note: if you changed the port before running the backend, you will have to change it here as well. + + + +## Notes + +If you would like to save your model inside of your docker container or move it into your local machine, you will need to access the terminal of your docker container. See how to do this [here](https://stackoverflow.com/a/30173220). + +If you want to train a new custom model, move the `yolov8n(custom).pt` out of your container's directory. It will automatically realize there is no custom model, and will create a new one from scratch to use when training custom models. \ No newline at end of file diff --git a/label_studio_ml/examples/yolov8/custom_config.yml b/label_studio_ml/examples/yolov8/custom_config.yml new file mode 100644 index 000000000..48fbf9a53 --- /dev/null +++ b/label_studio_ml/examples/yolov8/custom_config.yml @@ -0,0 +1,7 @@ +names: + 0: taxi + 1: other +path: ./temp +test: null +train: images +val: images diff --git a/label_studio_ml/examples/yolov8/label_to_coco.yml b/label_studio_ml/examples/yolov8/label_to_coco.yml new file mode 100644 index 000000000..1889d48c0 --- /dev/null +++ b/label_studio_ml/examples/yolov8/label_to_coco.yml @@ -0,0 +1,7 @@ +# Note: check here to match class names +# https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml +# the key should be what you've named your label, the value is what class it matches from the link above +labels_to_coco: + cats: Cat + lights: Traffic light + cars: Car \ No newline at end of file diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 3989a23e2..7ded3e4c4 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -1,6 +1,7 @@ from typing import List, Dict, Optional from label_studio_ml.model import LabelStudioMLBase from label_studio_ml.utils import get_image_local_path +from label_studio_tools.core.label_config import parse_config import os from PIL import Image @@ -15,10 +16,13 @@ LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN") LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") -JUST_CUSTOM = os.environ.get("JUST_CUSTOM") -# TODO: delete this line -JUST_CUSTOM = False +with open("label_to_coco.yml", "r") as file: + ls_config = yaml.safe_load(file) + +label_to_COCO = ls_config["labels_to_coco"] + +JUST_CUSTOM = True if len(label_to_COCO) == 0 else False # checks if you have already built a custom model # if you want to do it for a new task, move this model out of the directory @@ -40,21 +44,23 @@ class YOLO(LabelStudioMLBase): def __init__(self, project_id, **kwargs): super(YOLO, self).__init__(**kwargs) + + print(f"initializing teh model the kwargs are {kwargs}") self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps - print(self.label_config) - print(self.parsed_label_config) - self.first_use = FIRST_USE + self.custom_parsed_label_config = parse_config(self.label_config) - parsed = self.parsed_label_config - classes = parsed['label']['labels'] + # print(self.label_config) + # print(self.custom_parsed_label_config) - with open("label_to_coco.yml", "r") as file: - ls_config = yaml.safe_load(file) + self.first_use = FIRST_USE + print(f"can it gather the config? {self.custom_parsed_label_config} and {self.label_config}") + parsed = self.custom_parsed_label_config + classes = parsed['label']['labels'] - label_to_COCO = ls_config["labels_to_coco"] + print(f"the classes are {classes}") self.NEW_START = NEW_START self.JUST_CUSTOM = JUST_CUSTOM @@ -92,6 +98,8 @@ def __init__(self, project_id, **kwargs): def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: """ Inference logic for YOLO model """ + print("getting predictions") + self.from_name, self.to_name, self.value = self.get_first_tag_occurence('RectangleLabels', 'Image') imgs = [] diff --git a/label_studio_ml/examples/yolov8/requirements.txt b/label_studio_ml/examples/yolov8/requirements.txt index 2bf45e086..e7d23c550 100644 --- a/label_studio_ml/examples/yolov8/requirements.txt +++ b/label_studio_ml/examples/yolov8/requirements.txt @@ -1,2 +1,50 @@ gunicorn==20.1.0 -label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git \ No newline at end of file +label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git + +appdirs==1.4.4 +blinker==1.7.0 +certifi==2023.7.22 +charset-normalizer==3.3.1 +click==8.1.7 +colorama==0.4.6 +contourpy==1.1.1 +cycler==0.12.1 +filelock==3.12.4 +Flask==2.3.3 +fonttools==4.43.1 +fsspec==2023.10.0 +idna==3.4 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kiwisolver==1.4.5 +label-studio-tools==0.0.3 +lxml==4.9.3 +MarkupSafe==2.1.3 +matplotlib==3.8.0 +mpmath==1.3.0 +networkx==3.2 +numpy==1.26.1 +opencv-python==4.8.1.78 +packaging==23.2 +pandas==2.1.1 +Pillow==10.1.0 +psutil==5.9.6 +py-cpuinfo==9.0.0 +pyparsing==3.1.1 +python-dateutil==2.8.2 +pytz==2023.3.post1 +PyYAML==6.0.1 +requests==2.31.0 +scipy==1.11.3 +seaborn==0.13.0 +six==1.16.0 +sympy==1.12 +thop==0.1.1.post2209072238 +torch==2.1.0 +torchvision==0.16.0 +tqdm==4.66.1 +typing_extensions==4.8.0 +tzdata==2023.3 +ultralytics==8.0.200 +urllib3==2.0.7 +Werkzeug==3.0.1 From cebe8eead2a3bacc9f71e33df91f3f0294f0dae8 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 13 Nov 2023 10:59:37 -0600 Subject: [PATCH 12/21] Update wsgi --- label_studio_ml/examples/yolov8/_wsgi.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/label_studio_ml/examples/yolov8/_wsgi.py b/label_studio_ml/examples/yolov8/_wsgi.py index 9ab9ef564..1d6bcd6a1 100644 --- a/label_studio_ml/examples/yolov8/_wsgi.py +++ b/label_studio_ml/examples/yolov8/_wsgi.py @@ -29,7 +29,7 @@ }) from label_studio_ml.api import init_app -from model import NewModel +from model import YOLO _DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json') @@ -102,13 +102,13 @@ def parse_kwargs(): kwargs.update(parse_kwargs()) if args.check: - print('Check "' + NewModel.__name__ + '" instance creation..') - model = NewModel(**kwargs) + print('Check "' + YOLO.__name__ + '" instance creation..') + model = YOLO(**kwargs) - app = init_app(model_class=NewModel) + app = init_app(model_class=YOLO) app.run(host=args.host, port=args.port, debug=args.debug) else: # for uWSGI use - app = init_app(model_class=NewModel) + app = init_app(model_class=YOLO) From 59ba36cfcc53773fa2105d3bbd50b921a90b4265 Mon Sep 17 00:00:00 2001 From: shondle Date: Wed, 15 Nov 2023 10:55:21 -0600 Subject: [PATCH 13/21] Change ml backend to pip install --- label_studio_ml/examples/yolov8/model.py | 40 +++++++++++++------ .../examples/yolov8/requirements.txt | 9 +++-- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 7ded3e4c4..cc2fc56da 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -2,6 +2,7 @@ from label_studio_ml.model import LabelStudioMLBase from label_studio_ml.utils import get_image_local_path from label_studio_tools.core.label_config import parse_config +from label_studio_tools.core.utils.io import get_local_path import os from PIL import Image @@ -42,22 +43,22 @@ class YOLO(LabelStudioMLBase): - def __init__(self, project_id, **kwargs): + def __init__(self, **kwargs): super(YOLO, self).__init__(**kwargs) print(f"initializing teh model the kwargs are {kwargs}") self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps - self.custom_parsed_label_config = parse_config(self.label_config) + # self.custom_parsed_label_config = parse_config(self.label_config) - # print(self.label_config) + print(self.parsed_label_config) # print(self.custom_parsed_label_config) self.first_use = FIRST_USE - print(f"can it gather the config? {self.custom_parsed_label_config} and {self.label_config}") - parsed = self.custom_parsed_label_config + # print(f"can it gather the config? {self.custom_parsed_label_config} and {self.label_config}") + parsed = self.parsed_label_config classes = parsed['label']['labels'] print(f"the classes are {classes}") @@ -100,7 +101,10 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - print("getting predictions") - self.from_name, self.to_name, self.value = self.get_first_tag_occurence('RectangleLabels', 'Image') + # self.from_name, self.to_name, self.value = self.get_first_tag_occurence('RectangleLabels', 'Image') + + self.from_name = "label" + self.to_name = "image" imgs = [] lengths = [] @@ -111,28 +115,36 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - raw_img_path = task['data']['image'] try: - img_path = get_image_local_path( - raw_img_path, - label_studio_access_token=LABEL_STUDIO_ACCESS_TOKEN, - label_studio_host=LABEL_STUDIO_HOST + print(f"......the local image path is {raw_img_path}") + + img_path = get_local_path( + url=raw_img_path, + hostname=LABEL_STUDIO_HOST, + access_token=LABEL_STUDIO_ACCESS_TOKEN ) - print(f"the real image path is {img_path}") + + print(f"........the real image path is {img_path}") except: + print("..... umm we shouldn't be here") img_path = raw_img_path img = Image.open(img_path) + imgs.append(img) W, H = img.size lengths.append((H, W)) + # predicting from PIL loaded images if not self.JUST_CUSTOM: results_1 = pretrained_model.predict(source=imgs) # define model earlier else: results_1 = None + print(f"....did we make it here?") + # we don't want the predictions from the pretrained version of the custom model # because it hasn't reshaped to the new classes yet @@ -141,6 +153,9 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - else: results_2 = None + print(f"....did we make it here2222?") + + # each item will be the predictions for a task predictions = [] @@ -159,7 +174,8 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - pretrained = True if res_num == 0 else False # results names predictions.append(self.get_results(boxes.xywh, boxes.cls, len, boxes.conf, result.names, pretrained=pretrained)) - + print(f"the predictions are {predictions}") + return predictions def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pretrained=True): diff --git a/label_studio_ml/examples/yolov8/requirements.txt b/label_studio_ml/examples/yolov8/requirements.txt index e7d23c550..b7fe6e45f 100644 --- a/label_studio_ml/examples/yolov8/requirements.txt +++ b/label_studio_ml/examples/yolov8/requirements.txt @@ -10,12 +10,12 @@ colorama==0.4.6 contourpy==1.1.1 cycler==0.12.1 filelock==3.12.4 -Flask==2.3.3 +# Flask==1.1.2 fonttools==4.43.1 fsspec==2023.10.0 idna==3.4 -itsdangerous==2.1.2 -Jinja2==3.1.2 +# itsdangerous==2.0.1 +# Jinja2==3.0.3 kiwisolver==1.4.5 label-studio-tools==0.0.3 lxml==4.9.3 @@ -47,4 +47,5 @@ typing_extensions==4.8.0 tzdata==2023.3 ultralytics==8.0.200 urllib3==2.0.7 -Werkzeug==3.0.1 +# Werkzeug==2.0.2 +rq From c97be0b7483e75550beef7dddc5b76d21e1f0706 Mon Sep 17 00:00:00 2001 From: shondle Date: Sat, 18 Nov 2023 00:38:50 -0600 Subject: [PATCH 14/21] Add supervisord --- label_studio_ml/examples/yolov8/_wsgi.py | 10 ++-- .../examples/yolov8/docker-compose.yml | 27 ++++++++- label_studio_ml/examples/yolov8/model.py | 55 ++++++++++++------- .../examples/yolov8/supervisord.conf | 40 ++++++++++++++ label_studio_ml/examples/yolov8/uwsgi.ini | 10 ++++ 5 files changed, 113 insertions(+), 29 deletions(-) create mode 100644 label_studio_ml/examples/yolov8/supervisord.conf create mode 100644 label_studio_ml/examples/yolov8/uwsgi.ini diff --git a/label_studio_ml/examples/yolov8/_wsgi.py b/label_studio_ml/examples/yolov8/_wsgi.py index 1d6bcd6a1..f7cbc094d 100644 --- a/label_studio_ml/examples/yolov8/_wsgi.py +++ b/label_studio_ml/examples/yolov8/_wsgi.py @@ -29,7 +29,7 @@ }) from label_studio_ml.api import init_app -from model import YOLO +from model import YOLO_LS _DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json') @@ -102,13 +102,13 @@ def parse_kwargs(): kwargs.update(parse_kwargs()) if args.check: - print('Check "' + YOLO.__name__ + '" instance creation..') - model = YOLO(**kwargs) + print('Check "' + YOLO_LS.__name__ + '" instance creation..') + model = YOLO_LS(**kwargs) - app = init_app(model_class=YOLO) + app = init_app(model_class=YOLO_LS) app.run(host=args.host, port=args.port, debug=args.debug) else: # for uWSGI use - app = init_app(model_class=YOLO) + app = init_app(model_class=YOLO_LS) diff --git a/label_studio_ml/examples/yolov8/docker-compose.yml b/label_studio_ml/examples/yolov8/docker-compose.yml index 050e7f189..d9c28692d 100644 --- a/label_studio_ml/examples/yolov8/docker-compose.yml +++ b/label_studio_ml/examples/yolov8/docker-compose.yml @@ -1,13 +1,34 @@ version: "3.8" services: - ml-backend: + redis: + image: redis:alpine + container_name: redis + hostname: redis + volumes: + - "./data/redis:/data" + expose: + - 6379 + server: container_name: ml-backend build: . environment: - MODEL_DIR=/data/models - - WORKERS=2 - - THREADS=4 + # - WORKERS=2 + # - THREADS=4 - LOG_LEVEL=DEBUG + - LABEL_STUDIO_HOST= + - LABEL_STUDIO_ACCESS_TOKEN= + - RQ_QUEUE_NAME=default + - REDIS_HOST=redis + - REDIS_PORT=6379 + - LABEL_STUDIO_USE_REDIS=true ports: - "9090:9090" + depends_on: + - redis + links: + - redis + volumes: + - "./data/server:/data" + - "./logs:/tmp" diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index cc2fc56da..02cc56a1b 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -29,22 +29,11 @@ # if you want to do it for a new task, move this model out of the directory NEW_START = os.path.isfile('yolov8n(custom).pt') -if not JUST_CUSTOM: - pretrained_model = YOLO('yolov8n-oiv7.pt') -if NEW_START: - shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') - custom_model = YOLO('yolov8n(custom).pt') - FIRST_USE = True -else: - custom_model = YOLO('yolov8n(custom).pt') - FIRST_USE = False +class YOLO_LS(LabelStudioMLBase): - -class YOLO(LabelStudioMLBase): - - def __init__(self, **kwargs): - super(YOLO, self).__init__(**kwargs) + def __init__(self, project_id, **kwargs): + super(YOLO_LS, self).__init__(**kwargs) print(f"initializing teh model the kwargs are {kwargs}") self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps @@ -52,14 +41,28 @@ def __init__(self, **kwargs): # self.custom_parsed_label_config = parse_config(self.label_config) - print(self.parsed_label_config) + # print(self.parsed_label_config) # print(self.custom_parsed_label_config) + if not JUST_CUSTOM: + self.pretrained_model = YOLO('yolov8n-oiv7.pt') + # test = self.pretrained_model('./feral-cat-Kevin-Patrick.jpg') + + if not NEW_START: + shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') + self.custom_model = YOLO('yolov8n(custom).pt') + FIRST_USE = True + else: + self.custom_model = YOLO('yolov8n(custom).pt') + FIRST_USE = False + self.first_use = FIRST_USE # print(f"can it gather the config? {self.custom_parsed_label_config} and {self.label_config}") - parsed = self.parsed_label_config - classes = parsed['label']['labels'] + # parsed = self.parsed_label_config + # classes = parsed['label']['labels'] + + classes = ["cats", "cars", "taxi", "lights", "others"] print(f"the classes are {classes}") @@ -128,6 +131,9 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - print("..... umm we shouldn't be here") img_path = raw_img_path + # print(f"....did we make it here0.1?") + + print(f"the image path is {img_path}") img = Image.open(img_path) @@ -136,10 +142,16 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - W, H = img.size lengths.append((H, W)) + print(f"....did we make it here0.0? {W} {H}") # predicting from PIL loaded images if not self.JUST_CUSTOM: - results_1 = pretrained_model.predict(source=imgs) # define model earlier + print(f"at least we made it") + try: + results_1 = self.pretrained_model.predict(imgs[0]) # define model earlier + except Exception as e: + print(f"the error was {e}") + # results_1 = self.pretrained_model(imgs) else: results_1 = None @@ -149,7 +161,7 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - # we don't want the predictions from the pretrained version of the custom model # because it hasn't reshaped to the new classes yet if not self.first_use: - results_2 = custom_model.predict(source=imgs) + results_2 = self.custom_model.predict(source=imgs, sync=False) else: results_2 = None @@ -311,8 +323,9 @@ def fit(self, event, data, **kwargs): with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") - - results = custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) + print(f"........at least we started") + results = self.custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) + print(f"........maybe we can end") self.first_use = False diff --git a/label_studio_ml/examples/yolov8/supervisord.conf b/label_studio_ml/examples/yolov8/supervisord.conf new file mode 100644 index 000000000..4079c2132 --- /dev/null +++ b/label_studio_ml/examples/yolov8/supervisord.conf @@ -0,0 +1,40 @@ +[supervisord] +nodaemon = true +loglevel = info +logfile = supervisord.log + +[inet_http_server] +port=127.0.0.1:9001 + +[supervisorctl] +serverurl=http://127.0.0.1:9001 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[program:rq] +process_name=%(program_name)s_%(process_num)02d +command = rq worker --url redis://%(ENV_REDIS_HOST)s:6379/0 %(ENV_RQ_QUEUE_NAME)s +stopsignal = TERM +autostart = true +autorestart = true +killasgroup = true +stopasgroup = true +numprocs = 1 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 + +[program:wsgi] +environment = + RQ_QUEUE_NAME="%(ENV_RQ_QUEUE_NAME)s", + REDIS_HOST="%(ENV_REDIS_HOST)s" +command = uwsgi --ini /etc/uwsgi/uwsgi.ini +autostart = true +autorestart = true +stopsignal = QUIT +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 \ No newline at end of file diff --git a/label_studio_ml/examples/yolov8/uwsgi.ini b/label_studio_ml/examples/yolov8/uwsgi.ini new file mode 100644 index 000000000..9f4667019 --- /dev/null +++ b/label_studio_ml/examples/yolov8/uwsgi.ini @@ -0,0 +1,10 @@ +[uwsgi] +protocol = http +socket = 0.0.0.0:9090 +module = _wsgi:app +master = true +processes = 1 +vacuum = true +die-on-term = true +plugins = python37 +pidfile = /tmp/%n.pid \ No newline at end of file From 12fac61c4c939767ecefe791f183cb0260146df5 Mon Sep 17 00:00:00 2001 From: shondle Date: Sat, 18 Nov 2023 02:55:19 -0600 Subject: [PATCH 15/21] Fix Docker compatability issues --- label_studio_ml/examples/yolov8/README.md | 9 +-- .../{label_to_coco.yml => class_matching.yml} | 8 ++- label_studio_ml/examples/yolov8/model.py | 68 ++----------------- .../examples/yolov8/requirements.txt | 4 +- 4 files changed, 21 insertions(+), 68 deletions(-) rename label_studio_ml/examples/yolov8/{label_to_coco.yml => class_matching.yml} (78%) diff --git a/label_studio_ml/examples/yolov8/README.md b/label_studio_ml/examples/yolov8/README.md index c9611fa60..8977b4be4 100644 --- a/label_studio_ml/examples/yolov8/README.md +++ b/label_studio_ml/examples/yolov8/README.md @@ -31,11 +31,13 @@ Edit your labeling config to something like the following ``` -In the `label_to_coco.yml` edit the dictionary to where the keys are the exact names of your rectangular labels in label studio and the values are the exact names of the same classes in [open-images-v7.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml). +In the `class_matching.yml` edit the `labels_to_coco` dictionary to where the keys are the exact names of your rectangular labels in label studio and the values are the exact names of the same classes in [open-images-v7.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/open-images-v7.yaml). -Any classes in your labeling config that you do not add to `label_to_coco.yml` will be trained using the second, custom model. +Any classes in your labeling config that you do not add to the `labels_to_coco` dictionary in `class_matching.yml` will be trained using the second, custom model. -Note: if you leave this YAML file empty with no keys and values, only the custom model will be trained and then used for predictions. In such a case, the model trained on 600 classes will not be used at all. +In the `all_classes` dictionary add all of the classes in your Label Studio labeling config that are under the rectangular labels. + +Note: if you leave the `labels_to_coco` dictionary empty with no keys and values, only the custom model will be trained and then used for predictions. In such a case, the model trained on 600 classes will not be used at all. 2. Editing `docker-compose.yml` @@ -48,7 +50,6 @@ Set `LABEL_STUDIO_ACCESS_TOKEN` by going to your Label Studio Accounts & Setting Run `docker compose up` to start the backend. Under the `Machine Learning` settings in your project in Label Studio enter the following URL while adding the model: `http://{your_private_ip}:9090`. Note: if you changed the port before running the backend, you will have to change it here as well. - ## Notes If you would like to save your model inside of your docker container or move it into your local machine, you will need to access the terminal of your docker container. See how to do this [here](https://stackoverflow.com/a/30173220). diff --git a/label_studio_ml/examples/yolov8/label_to_coco.yml b/label_studio_ml/examples/yolov8/class_matching.yml similarity index 78% rename from label_studio_ml/examples/yolov8/label_to_coco.yml rename to label_studio_ml/examples/yolov8/class_matching.yml index 1889d48c0..1ffb4d67d 100644 --- a/label_studio_ml/examples/yolov8/label_to_coco.yml +++ b/label_studio_ml/examples/yolov8/class_matching.yml @@ -4,4 +4,10 @@ labels_to_coco: cats: Cat lights: Traffic light - cars: Car \ No newline at end of file + cars: Car +all_classes: + - "cats" + - "cars" + - "taxi" + - "lights" + - "others" \ No newline at end of file diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 02cc56a1b..50b24f1af 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -1,7 +1,6 @@ from typing import List, Dict, Optional from label_studio_ml.model import LabelStudioMLBase from label_studio_ml.utils import get_image_local_path -from label_studio_tools.core.label_config import parse_config from label_studio_tools.core.utils.io import get_local_path import os @@ -18,10 +17,11 @@ LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST") -with open("label_to_coco.yml", "r") as file: +with open("class_matching.yml", "r") as file: ls_config = yaml.safe_load(file) label_to_COCO = ls_config["labels_to_coco"] +all_classes = ls_config["all_classes"] JUST_CUSTOM = True if len(label_to_COCO) == 0 else False @@ -35,18 +35,10 @@ class YOLO_LS(LabelStudioMLBase): def __init__(self, project_id, **kwargs): super(YOLO_LS, self).__init__(**kwargs) - print(f"initializing teh model the kwargs are {kwargs}") self.device = "cuda" if torch.cuda.is_available else "cpu" # can to mps - - # self.custom_parsed_label_config = parse_config(self.label_config) - - # print(self.parsed_label_config) - # print(self.custom_parsed_label_config) - if not JUST_CUSTOM: self.pretrained_model = YOLO('yolov8n-oiv7.pt') - # test = self.pretrained_model('./feral-cat-Kevin-Patrick.jpg') if not NEW_START: shutil.copyfile('./yolov8n.pt', 'yolov8n(custom).pt') @@ -58,13 +50,10 @@ def __init__(self, project_id, **kwargs): self.first_use = FIRST_USE - # print(f"can it gather the config? {self.custom_parsed_label_config} and {self.label_config}") - # parsed = self.parsed_label_config - # classes = parsed['label']['labels'] - - classes = ["cats", "cars", "taxi", "lights", "others"] + self.from_name = "label" + self.to_name = "image" - print(f"the classes are {classes}") + classes = all_classes self.NEW_START = NEW_START self.JUST_CUSTOM = JUST_CUSTOM @@ -74,10 +63,6 @@ def __init__(self, project_id, **kwargs): first_label_classes = list(label_to_COCO.keys()) # raw labels from labelling config second_label_classes = [x for x in classes if x not in set(first_label_classes)] # raw labels from labelling config - - # if the user changes the labelling config, it shouldn't automatically destroy everything - # so only change it if we are starting brand new - input_file = "custom_config.yml" with open(input_file, "r") as file: data = yaml.safe_load(file) @@ -93,22 +78,12 @@ def __init__(self, project_id, **kwargs): else: self.custom_num_to_name = data["names"] - print(f"self class to name is {self.custom_num_to_name}") self.custom_name_to_num = {v:k for k, v in self.custom_num_to_name.items()} - - print(classes) def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: """ Inference logic for YOLO model """ - print("getting predictions") - - # self.from_name, self.to_name, self.value = self.get_first_tag_occurence('RectangleLabels', 'Image') - - self.from_name = "label" - self.to_name = "image" - imgs = [] lengths = [] @@ -118,22 +93,15 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - raw_img_path = task['data']['image'] try: - print(f"......the local image path is {raw_img_path}") - img_path = get_local_path( url=raw_img_path, hostname=LABEL_STUDIO_HOST, access_token=LABEL_STUDIO_ACCESS_TOKEN ) - print(f"........the real image path is {img_path}") except: - print("..... umm we shouldn't be here") img_path = raw_img_path - # print(f"....did we make it here0.1?") - - print(f"the image path is {img_path}") img = Image.open(img_path) @@ -142,32 +110,22 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - W, H = img.size lengths.append((H, W)) - print(f"....did we make it here0.0? {W} {H}") - # predicting from PIL loaded images if not self.JUST_CUSTOM: - print(f"at least we made it") try: - results_1 = self.pretrained_model.predict(imgs[0]) # define model earlier + results_1 = self.pretrained_model.predict(imgs) # define model earlier except Exception as e: print(f"the error was {e}") - # results_1 = self.pretrained_model(imgs) else: results_1 = None - print(f"....did we make it here?") - - # we don't want the predictions from the pretrained version of the custom model # because it hasn't reshaped to the new classes yet if not self.first_use: - results_2 = self.custom_model.predict(source=imgs, sync=False) + results_2 = self.custom_model.predict(source=imgs) else: results_2 = None - print(f"....did we make it here2222?") - - # each item will be the predictions for a task predictions = [] @@ -178,15 +136,9 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - for (result, len) in zip(results, lengths): boxes = result.boxes.cpu().numpy() - - print(result.names) # gives dict matching num to names ex. {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4 - - print(f"the confidences are {boxes.conf}") - pretrained = True if res_num == 0 else False # results names predictions.append(self.get_results(boxes.xywh, boxes.cls, len, boxes.conf, result.names, pretrained=pretrained)) - print(f"the predictions are {predictions}") return predictions @@ -195,8 +147,6 @@ def get_results(self, boxes, classes, length, confidences, num_to_names_dict, pr results = [] - print(f"the to and from names are {self.from_name} and {self.to_name}") - for box, class_num, conf in zip(boxes, classes, confidences): label_id = str(uuid4())[:9] @@ -274,8 +224,6 @@ def fit(self, event, data, **kwargs): project_path = sample_img_path.split("/")[:-1] image_name = sample_img_path.split("/")[-1] - print(f"image name is {image_name}") - img1 = img.save(f"./datasets/temp/images/{image_name}") img2 = img.save(f"./datasets/temp/images/(2){image_name}") @@ -323,9 +271,7 @@ def fit(self, event, data, **kwargs): with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") - print(f"........at least we started") results = self.custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) - print(f"........maybe we can end") self.first_use = False diff --git a/label_studio_ml/examples/yolov8/requirements.txt b/label_studio_ml/examples/yolov8/requirements.txt index b7fe6e45f..23a7f7a09 100644 --- a/label_studio_ml/examples/yolov8/requirements.txt +++ b/label_studio_ml/examples/yolov8/requirements.txt @@ -40,8 +40,8 @@ seaborn==0.13.0 six==1.16.0 sympy==1.12 thop==0.1.1.post2209072238 -torch==2.1.0 -torchvision==0.16.0 +torch==2.0.1 +torchvision==0.15.2 tqdm==4.66.1 typing_extensions==4.8.0 tzdata==2023.3 From fb6d628768c0d140b00949eda10a996c9329a835 Mon Sep 17 00:00:00 2001 From: shondle Date: Sat, 18 Nov 2023 02:57:06 -0600 Subject: [PATCH 16/21] Remove predefined workers --- label_studio_ml/examples/yolov8/docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/label_studio_ml/examples/yolov8/docker-compose.yml b/label_studio_ml/examples/yolov8/docker-compose.yml index d9c28692d..a55cf4a55 100644 --- a/label_studio_ml/examples/yolov8/docker-compose.yml +++ b/label_studio_ml/examples/yolov8/docker-compose.yml @@ -14,8 +14,6 @@ services: build: . environment: - MODEL_DIR=/data/models - # - WORKERS=2 - # - THREADS=4 - LOG_LEVEL=DEBUG - LABEL_STUDIO_HOST= - LABEL_STUDIO_ACCESS_TOKEN= From ce99c2e23a802a9ecb90b60964c9fd2b8ece20d6 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 20 Nov 2023 09:57:31 -0600 Subject: [PATCH 17/21] Fix label misnaming --- label_studio_ml/examples/yolov8/model.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index 50b24f1af..f9c02de62 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -81,6 +81,7 @@ def __init__(self, project_id, **kwargs): self.custom_name_to_num = {v:k for k, v in self.custom_num_to_name.items()} + def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: """ Inference logic for YOLO model """ @@ -224,6 +225,8 @@ def fit(self, event, data, **kwargs): project_path = sample_img_path.split("/")[:-1] image_name = sample_img_path.split("/")[-1] + + img1 = img.save(f"./datasets/temp/images/{image_name}") img2 = img.save(f"./datasets/temp/images/(2){image_name}") @@ -231,7 +234,7 @@ def fit(self, event, data, **kwargs): all_new_paths.append(f"./datasets/temp/images/(2){image_name}") # now saving text file labels - txt_name = (image_path.split('/')[-1]).rsplit('.', 1)[0] + txt_name = image_name.rsplit('.', 1)[0] with open(f'./datasets/temp/labels/{txt_name}.txt', 'w') as f: f.write("") @@ -246,7 +249,7 @@ def fit(self, event, data, **kwargs): value = result['value'] label = value['rectanglelabels'][0] - + if label in self.custom_name_to_num: # these are out of 100, so you need to convert them back @@ -263,7 +266,7 @@ def fit(self, event, data, **kwargs): trans_x = (x / 100) + (0.5 * w) trans_y = (y / 100) + (0.5 * h) - # now getting the class label + # now getting the class label label_num = self.custom_name_to_num.get(label) with open(f'./datasets/temp/labels/{txt_name}.txt', 'a') as f: From 4dcf5bea005f90bbfd4b75782809457753ab03d0 Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 20 Nov 2023 10:02:09 -0600 Subject: [PATCH 18/21] Update dependencies --- label_studio_ml/examples/yolov8/Dockerfile | 20 +++++++++++++------ .../examples/yolov8/requirements.txt | 3 ++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/label_studio_ml/examples/yolov8/Dockerfile b/label_studio_ml/examples/yolov8/Dockerfile index d86a8ab17..1e52aed48 100644 --- a/label_studio_ml/examples/yolov8/Dockerfile +++ b/label_studio_ml/examples/yolov8/Dockerfile @@ -6,11 +6,9 @@ RUN apt-get update && \ apt-get -y install ffmpeg libsm6 libxext6 libffi-dev python3-dev gcc ENV PYTHONUNBUFFERED=True \ - PORT=9090 \ - WORKERS=2 \ - THREADS=4 + PORT=9090 -WORKDIR /app +WORKDIR /tmp COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt @@ -18,6 +16,16 @@ RUN pip install --no-cache-dir -r requirements.txt RUN wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt RUN wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt -COPY . ./ +COPY uwsgi.ini /etc/uwsgi/ +COPY supervisord.conf /etc/supervisor/conf.d/ + +WORKDIR /app + +RUN mkdir -p datasets/temp/images +RUN mkdir -p datasets/temp/labels + +COPY * /app/ + +EXPOSE 9090 -CMD exec gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app +CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file diff --git a/label_studio_ml/examples/yolov8/requirements.txt b/label_studio_ml/examples/yolov8/requirements.txt index 23a7f7a09..63edee8a9 100644 --- a/label_studio_ml/examples/yolov8/requirements.txt +++ b/label_studio_ml/examples/yolov8/requirements.txt @@ -47,5 +47,6 @@ typing_extensions==4.8.0 tzdata==2023.3 ultralytics==8.0.200 urllib3==2.0.7 -# Werkzeug==2.0.2 +supervisor==4.2.2 +uwsgi==2.0.21 rq From ed89b40fe6afdb59c3b15b62bb90528a7e0933bd Mon Sep 17 00:00:00 2001 From: shondle Date: Mon, 20 Nov 2023 10:43:34 -0600 Subject: [PATCH 19/21] Explain torch pin --- label_studio_ml/examples/yolov8/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/label_studio_ml/examples/yolov8/requirements.txt b/label_studio_ml/examples/yolov8/requirements.txt index 63edee8a9..c751da30d 100644 --- a/label_studio_ml/examples/yolov8/requirements.txt +++ b/label_studio_ml/examples/yolov8/requirements.txt @@ -40,8 +40,11 @@ seaborn==0.13.0 six==1.16.0 sympy==1.12 thop==0.1.1.post2209072238 + +# pinned torch and torchvision to previous versions due to signal 11 killing worker errors torch==2.0.1 torchvision==0.15.2 + tqdm==4.66.1 typing_extensions==4.8.0 tzdata==2023.3 From 7d80978078146fb9cb906dd9cab7e342a4ae7c91 Mon Sep 17 00:00:00 2001 From: Shivansh Date: Wed, 29 Nov 2023 08:17:07 -0600 Subject: [PATCH 20/21] Add batching support in fit method (#387) --- label_studio_ml/examples/yolov8/model.py | 126 +++++++++++++---------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/label_studio_ml/examples/yolov8/model.py b/label_studio_ml/examples/yolov8/model.py index f9c02de62..37cce93eb 100644 --- a/label_studio_ml/examples/yolov8/model.py +++ b/label_studio_ml/examples/yolov8/model.py @@ -85,6 +85,8 @@ def __init__(self, project_id, **kwargs): def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: """ Inference logic for YOLO model """ + print("..... we here 3.9") + imgs = [] lengths = [] @@ -195,14 +197,23 @@ def fit(self, event, data, **kwargs): You can run your logic here to update the model """ - results = data['annotation']['result'] - data = data['task']['data'] - image_path = data['image'] - image_paths = [image_path] all_new_paths = [] - true_img_paths = [] - for raw_img_path in image_paths: + try: + total_results = data['annotations'] + except: # then this is a submission of just one image from the annotation image page + total_results = [data] + + multiple_results = True if len(total_results) > 1 else False + + for task in total_results: + + if not multiple_results: + raw_img_path = task["task"]["data"]["image"] + else: + raw_img_path = task["data"]["image"] + + try: img_path = get_image_local_path( raw_img_path, @@ -211,68 +222,77 @@ def fit(self, event, data, **kwargs): ) except: img_path = raw_img_path - + img = Image.open(img_path) - name = raw_img_path.split("/")[-1] + sample_img_path = img_path - true_img_paths.append(img_path) + img = Image.open(sample_img_path) + + image_name = sample_img_path.split("/")[-1] - sample_img_path = true_img_paths[0] + img1 = img.save(f"./datasets/temp/images/{image_name}") - img = Image.open(sample_img_path) + if not multiple_results: + img2 = img.save(f"./datasets/temp/images/(2){image_name}") - project_path = sample_img_path.split("/")[:-1] - image_name = sample_img_path.split("/")[-1] + all_new_paths.append(f"./datasets/temp/images/{image_name}") - + if not multiple_results: + all_new_paths.append(f"./datasets/temp/images/(2){image_name}") - img1 = img.save(f"./datasets/temp/images/{image_name}") - img2 = img.save(f"./datasets/temp/images/(2){image_name}") + # now saving text file labels + txt_name = image_name.rsplit('.', 1)[0] - all_new_paths.append(f"./datasets/temp/images/{image_name}") - all_new_paths.append(f"./datasets/temp/images/(2){image_name}") + with open(f'./datasets/temp/labels/{txt_name}.txt', 'w') as f: + f.write("") - # now saving text file labels - txt_name = image_name.rsplit('.', 1)[0] + + if not multiple_results: + with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'w') as f: + f.write("") - with open(f'./datasets/temp/labels/{txt_name}.txt', 'w') as f: - f.write("") - with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'w') as f: - f.write("") + all_new_paths.append(f'./datasets/temp/labels/{txt_name}.txt') - all_new_paths.append(f'./datasets/temp/labels/{txt_name}.txt') - all_new_paths.append(f'./datasets/temp/labels/(2){txt_name}.txt') + if not multiple_results: + all_new_paths.append(f'./datasets/temp/labels/(2){txt_name}.txt') + if not multiple_results: + results = task["annotation"]["result"] + else: + results = task["annotations"][0]["result"] - for result in results: - value = result['value'] - label = value['rectanglelabels'][0] - - if label in self.custom_name_to_num: - - # these are out of 100, so you need to convert them back - x = value['x'] - y = value['y'] - width = value['width'] - height = value['height'] - - orig_width = result['original_width'] - orig_height = result['original_height'] - - w = width / 100 - h = height / 100 - trans_x = (x / 100) + (0.5 * w) - trans_y = (y / 100) + (0.5 * h) - - # now getting the class label - label_num = self.custom_name_to_num.get(label) - - with open(f'./datasets/temp/labels/{txt_name}.txt', 'a') as f: - f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") - with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: - f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") + for result in results: + + value = result['value'] + label = value['rectanglelabels'][0] + + if label in self.custom_name_to_num: + + # these are out of 100, so you need to convert them back + x = value['x'] + y = value['y'] + width = value['width'] + height = value['height'] + + orig_width = result['original_width'] + orig_height = result['original_height'] + + w = width / 100 + h = height / 100 + trans_x = (x / 100) + (0.5 * w) + trans_y = (y / 100) + (0.5 * h) + + # now getting the class label + label_num = self.custom_name_to_num.get(label) + + with open(f'./datasets/temp/labels/{txt_name}.txt', 'a') as f: + f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") + + if not multiple_results: + with open(f'./datasets/temp/labels/(2){txt_name}.txt', 'a') as f: + f.write(f"{label_num} {trans_x} {trans_y} {w} {h}\n") results = self.custom_model.train(data='custom_config.yml', epochs = 1, imgsz=640) From f5d2464ab3a7a6f8a57c4dcbfaa58e12c0a77ba6 Mon Sep 17 00:00:00 2001 From: Shivansh Date: Wed, 29 Nov 2023 19:02:24 -0600 Subject: [PATCH 21/21] Add training instructions and video sample --- label_studio_ml/examples/yolov8/README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/label_studio_ml/examples/yolov8/README.md b/label_studio_ml/examples/yolov8/README.md index 8977b4be4..67d3fefce 100644 --- a/label_studio_ml/examples/yolov8/README.md +++ b/label_studio_ml/examples/yolov8/README.md @@ -1,11 +1,17 @@ This project integrates the YOLOv8 model with Label Studio. + +https://github.com/HumanSignal/label-studio-ml-backend/assets/106922533/82f539f1-dbee-47bf-b129-f7b5df83af43 + + + ## How The Project Works This project helps you detect objects in Label Studio by doing two things. 1 - Uses a pretrained YOLOv8 model on Google's Open Images V7 (OIV7) to provide a pretrained model on 600 classes! + 2 - Use a custom model for classes in cases that don't fit under the 600 classes in the OIV7 dataset While annotating in label studio, you predefine which one of your labels overlap with the first pretrained model and custom labels that don't fit under the 600 classes are automatically used in the second custom model for predictions that is trained as you submit annotations in Label Studio. @@ -49,9 +55,17 @@ Set `LABEL_STUDIO_ACCESS_TOKEN` by going to your Label Studio Accounts & Setting Run `docker compose up` to start the backend. Under the `Machine Learning` settings in your project in Label Studio enter the following URL while adding the model: `http://{your_private_ip}:9090`. Note: if you changed the port before running the backend, you will have to change it here as well. +## Training With ML Backend + +In the machine learning tab for label studio, make sure the first toggle for training the model when annotations are submitted is turned on. This will allow training the custom model for custom classes that you defined in the previous steps when you submit annotations. + +If you would like to train multiple images at once, which is preferred, run label studio from docker using the [`feature/batch-train`](https://github.com/HumanSignal/label-studio/tree/feature/batch-train) branch. Under the app and inside the environment variables in the `docker-compose.yml` add `EXPERIMENTAL_FEATURES=True`. Then, run the instance. + +In the task menu, select all the tasks you would like to train your ML backend custom model on and under the toggle menu in the top left hand corner, select `Batch Train` and select `Ok` in the next popup menu. + ## Notes If you would like to save your model inside of your docker container or move it into your local machine, you will need to access the terminal of your docker container. See how to do this [here](https://stackoverflow.com/a/30173220). -If you want to train a new custom model, move the `yolov8n(custom).pt` out of your container's directory. It will automatically realize there is no custom model, and will create a new one from scratch to use when training custom models. \ No newline at end of file +If you want to train a new custom model, move the `yolov8n(custom).pt` out of your container's directory. It will automatically realize there is no custom model, and will create a new one from scratch to use when training custom models.