diff --git a/.gitignore b/.gitignore index 2644b8e..834ace0 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ models/yolov4.weights models/yolov4_new.weights models/yolov4x-mish.weights models/yolov4-csp.weights +models/yolov4-colors.weights diff --git a/README.md b/README.md index 7ef992c..255417b 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,33 @@ # Baxter-Robot-ObjDet Repository for some experiments with Baxter robot and Object Detection. -### Jupyter Notebook -The notebook contains some examples of image processing with OpenCV and Pyhton. -If it's not in the right path, open the notebook with Anaconda Propt passing the correct disk. -Example: +The code allows the robot: +- to detect different objects, +- approach them with his arm, indipendently from their position on the table in front of it, +- pick them up safely and move them. + +> The models of large size in the folder models can be downloaded from the [releases](https://github.com/igor-lirussi/Baxter-Robot-ObjDet/releases) + +### Jupyter Notebooks +The notebooks contain some examples of image processing with OpenCV and Pyhton. + +The notebooks study both face detection and object-detection. + +If it's not in the right path, open the notebook with Anaconda Propt passing the correct disk. Example: ''' jupyter notebook --notebook-dir=D: ''' +### Colors Toys Object Detection Model +A Yolo v4 model has been trained to detect 9 different classes of food toys present in CoLoRs Lab. + +In the [releases](https://github.com/igor-lirussi/Baxter-Robot-ObjDet/releases) you can download: +- colors.names +- yolov4-colors.cfg +- yolov4-colors.weights + +!(dataset)[img/dataset.jpg] + ## Built With * Python 3.7.18 @@ -16,6 +35,7 @@ jupyter notebook --notebook-dir=D: ## Changelog **Version 1.0** - Initial release, 31-01-2022 +**Version 1.1** - Added Colors yolov4 trained net, 28-09-2023 ## Authors diff --git a/baxter-python3 b/baxter-python3 index fade81a..77bacc9 160000 --- a/baxter-python3 +++ b/baxter-python3 @@ -1 +1 @@ -Subproject commit fade81aaebb9a8634229700a1178c87a62baab2c +Subproject commit 77bacc99c96052e2d7112386aa5cde5743d53eb2 diff --git a/img/dataset.jpg b/img/dataset.jpg new file mode 100644 index 0000000..0774b9b Binary files /dev/null and b/img/dataset.jpg differ diff --git a/models/-README.md b/models/-README.md new file mode 100644 index 0000000..48456e7 --- /dev/null +++ b/models/-README.md @@ -0,0 +1,2 @@ +## Model folder +Large files of this folder (es weights of the neural networks) can be downloaded manually from the [releases](https://github.com/igor-lirussi/Baxter-Robot-ObjDet/releases) \ No newline at end of file diff --git a/models/colors.names b/models/colors.names new file mode 100644 index 0000000..0735171 --- /dev/null +++ b/models/colors.names @@ -0,0 +1,9 @@ +apple +cabbage +carrot +cheese +grapes +lemon +mushroom +strawberry +tomato \ No newline at end of file diff --git a/models/yolov4-colors.cfg b/models/yolov4-colors.cfg new file mode 100644 index 0000000..e073968 --- /dev/null +++ b/models/yolov4-colors.cfg @@ -0,0 +1,1160 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=32 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 18000 +policy=steps +steps=14400,16200 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=800 + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 54 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=42 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=9 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=leaky + +[route] +layers = -1, -16 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=42 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=9 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=leaky + +[route] +layers = -1, -37 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=42 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=9 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + diff --git a/reach_obj_det_YoloV4.py b/reach_obj_det_YoloV4.py index 0a7882c..97902d8 100644 --- a/reach_obj_det_YoloV4.py +++ b/reach_obj_det_YoloV4.py @@ -5,6 +5,7 @@ face=importlib.import_module("baxter-python3.faces") import cv2 import numpy as np +import math import argparse from baxter_core_msgs.msg import EndpointState @@ -15,16 +16,19 @@ unreachable_count=0 garabbed=False +model_list = ["yolov4","yolov4-new","yolov4x-mish","yolov4-p6","yolov4-colors"] + parser = argparse.ArgumentParser() -parser.add_argument('-m', '--model', type=str, default='yolov4-new', help='Model desired') +parser.add_argument('-m', '--model', type=str, default='yolov4-colors', help='Model desired among '+str(model_list)) parser.add_argument('-o', '--object', type=str, default='apple', help='Object to reach and pick') +parser.add_argument('-a', '--arm', type=str, default='left', help='Arm, left or right') args = parser.parse_args() -model_list = ["yolov4","yolov4-new", "yolov4x-mish", "yolov4-p6"] OBJECT_DESIRED = args.object +side = args.arm print("[INFO] loading model...") -if args.model == model_list[0]: +if args.model == "yolov4": #Load net modelConfig = "./models/yolov4.cfg" modelWeigths = "./models/yolov4.weights" @@ -38,7 +42,7 @@ conf_threshold = 0.1 nms_threshold = 0.6 #lower=stronger -elif args.model == model_list[1]: +elif args.model == "yolov4-new": #Load net modelConfig = "./models/yolov4_new.cfg" modelWeigths = "./models/yolov4_new.weights" @@ -53,7 +57,7 @@ conf_threshold = 0.35 nms_threshold = 0.03 #lower=stronger -elif args.model == model_list[2]: +elif args.model == "yolov4x-mish": #Load net modelConfig = "./models/yolov4x-mish.cfg" modelWeigths = "./models/yolov4x-mish.weights" @@ -68,7 +72,7 @@ conf_threshold = 0.35 nms_threshold = 0.01 #lower=stronger -elif args.model == model_list[3]: +elif args.model == "yolov4-p6": #Load net modelConfig = "./models/yolov4-p6-1280x1280.cfg" modelWeigths = "./models/yolov4-p6-1280x1280.weights" @@ -83,6 +87,21 @@ conf_threshold = 0.35 nms_threshold = 0.01 #lower=stronger +elif args.model == "yolov4-colors": + #Load net + modelConfig = "./models/yolov4-colors.cfg" + modelWeigths = "./models/yolov4-colors.weights" + net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths) + print("Net Loaded: {}".format(args.model)) + + with open('./models/colors.names', 'r') as f: + classes = f.read().splitlines() + print("Classes: {}".format(len(classes))) + + #suggested + conf_threshold = 0.35 + nms_threshold = 0.6 #lower=stronger + else: print("[Error] Model passed not present, choose between: {}".format(model_list)) exit() @@ -114,9 +133,9 @@ def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h): print("[INFO] starting robot...") rospy.init_node("testing") rospy.sleep(2.0) -robot = baxter.BaxterRobot(rate=100, arm="left") +robot = baxter.BaxterRobot(rate=100, arm=side) rospy.sleep(2.0) -robot._set_camera(camera_name="left_hand_camera", state=True, width=WIDTH, height=HEIGHT, fps=30) +robot._set_camera(camera_name=side+"_hand_camera", state=True, width=WIDTH, height=HEIGHT, fps=30) robot.set_robot_state(True) @@ -127,21 +146,24 @@ def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h): robot.gripper_release() #display face -face._set_look(robot, "left_down", DISPLAY_FACE) +face._set_look(robot, side+"_down", DISPLAY_FACE) print("[INFO] moving in position...") print(robot.move_to_neutral()) -face._set_look(robot, "left", DISPLAY_FACE) +face._set_look(robot, side, DISPLAY_FACE) print(robot.move_to_zero()) face._set_look(robot, "frontal", DISPLAY_FACE) -print(robot.move_to_joint_position({"left_s0": -PI/4}, timeout=10)) data = np.array(list(robot._cam_image.data), dtype=np.uint8) middle_point = np.array([WIDTH/2, HEIGHT/2]) #move over the table -pos_x = 0.8203694373186249 -pos_y = 0.08642622598662506 +if side=="left": + pos_x = 0.8203694373186249 + pos_y = 0.08642622598662506 +else: + pos_x = 0.7456267492841516 + pos_y = -0.18863639477015234 pos_z = 0.28462916699929078 ori_x = 0.011154239796145276 ori_y = 0.9989687054009745 @@ -196,10 +218,24 @@ def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h): indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold) print("Detections: "+str(indices.shape[0])) if len(indices)!=0 else print("No Detections") - #object reset not detected and in the center + #set gripper center, different from image center, should be in between the tips of the gripper, a little down. + gripper_delta_x=0 + gripper_delta_y=0 + if side=="left": #cameras in hands can be slightly diffenent angles, so center of gripper in the images is different + gripper_delta_x=20 + gripper_delta_y=-30 + elif side=="right": + gripper_delta_x=40 + gripper_delta_y=-100 + #center for the calculations + center_object_x = round(WIDTH/2)+gripper_delta_x # usually the gripper center is a little on the right of the image of camera + center_object_y = round(HEIGHT/2)+gripper_delta_y # usually the gripper is a litte up compared to the camera + + #object reset: not detected and in the center object_present=False - object_x=WIDTH/2 - object_y=HEIGHT/2 + objects_list=[] + object_x=0 + object_y=0 # go through the detections remaining # after nms and draw bounding box for i in indices: @@ -210,38 +246,36 @@ def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h): w = box[2] h = box[3] draw_bounding_box(img, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h)) - #save X Y object if present + #save (X Y) object if present if classes[class_ids[i]] == OBJECT_DESIRED: object_present=True object_x = round(x+(w/2)) object_y = round(y+(h/2)) - center_object_x = round(WIDTH/2)+20 #the gripper center is a little on the right of the image - center_object_y = round(HEIGHT/2)-20 #the gripper is a litte up compared to the camera + objects_list.append((object_x,object_y)) print("{} found at: {} {}, size: {} {}".format(classes[class_ids[i]],object_x,object_y, w,h)) cv2.putText(img, "X", (object_x,object_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2) cv2.putText(img, "O", (center_object_x,center_object_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 3) + #object is selected as the closest one if there are many + if len(objects_list) > 1: + print("Many found, finding the closest") + closest_distance = float('inf') + for x, y in objects_list: + # Calculate Euclidean distance between the current point and the target point + distance = math.sqrt((x - center_object_x) ** 2 + (y - center_object_y) ** 2) + if distance < closest_distance: + closest_distance = distance + object_x, object_y = (x, y) + + #put different color mark on object selected + cv2.putText(img, "X", (object_x,object_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2) - #if near: grab + #check infrared distance if robot._ir_range.range > robot._ir_range.min_range and robot._ir_range.range < robot._ir_range.max_range: current_range = robot._ir_range.range distance_str= "Dist: {:0.2f}".format(robot._ir_range.range) print(distance_str) cv2.putText(img, distance_str, (50,50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2) - if current_range < 0.15 and not garabbed: - print("[info] Gripper close enough, GRASPING") - garabbed = True - #grab - robot.gripper_grip() - rospy.sleep(2.0) - #move - robot.set_cartesian_position([pos_x, pos_y, pos_z], [ori_x, ori_y, ori_z, ori_w]) - robot.move_to_zero() - rospy.sleep(2.0) - robot.gripper_release() - garabbed=False - rospy.sleep(2.0) - robot.set_cartesian_position([pos_x, pos_y, pos_z], [ori_x, ori_y, ori_z, ori_w]) else: current_range = 9999 print("Range sensor out of limits") @@ -250,10 +284,27 @@ def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h): #display image robot._set_display_data(cv2.resize(img, (1024,600))) + #if too close, grab + if object_present and current_range < 0.16 and not garabbed: + print("[info] Gripper CLOSE enough and object present, GRABBING without more movements") + #face._set_face(robot, "determined", DISPLAY_FACE) + #grab + garabbed = True + robot.gripper_grip() + rospy.sleep(2.0) + #move + robot.set_cartesian_position([pos_x, pos_y, pos_z], [ori_x, ori_y, ori_z, ori_w]) + robot.move_to_zero() + rospy.sleep(2.0) + robot.gripper_release() + garabbed=False + rospy.sleep(2.0) + robot.set_cartesian_position([pos_x, pos_y, pos_z], [ori_x, ori_y, ori_z, ori_w]) + #if present and not close enough: move towards it if object_present and not garabbed: #get current arm position - msg = rospy.wait_for_message("/robot/limb/left/endpoint_state", EndpointState) + msg = rospy.wait_for_message("/robot/limb/"+side+"/endpoint_state", EndpointState) p = msg.pose.position q = msg.pose.orientation #compute deviation in image