diff --git a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h index 2da8aafe2d..23e2e7e3af 100644 --- a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h +++ b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h @@ -74,6 +74,7 @@ BEGIN_VISP_NAMESPACE * - Yolo v5, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov5 network * - Yolo v7, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov7 network * - Yolo v8, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov8 network + * - Yolo v11, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov11 network * * This class can be initialized from a JSON file if ViSP has been compiled with NLOHMANN JSON (see \ref soft_tool_json to see how to do it). * Examples of such JSON files can be found in the tutorial folder. @@ -98,8 +99,9 @@ class VISP_EXPORT vpDetectorDNNOpenCV YOLO_V4 = 5, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV4 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV3_V4 for more information.*/ YOLO_V5 = 6, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV5 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV5_V7 for more information.*/ YOLO_V7 = 7, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV7 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV5_V7 for more information.*/ - YOLO_V8 = 8, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV8 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8 for more information.*/ - COUNT = 9 /*!< The number of parsing method that come along with the \b vpDetectorDNNOpenCV class.*/ + YOLO_V8 = 8, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV8 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8_V11 for more information.*/ + YOLO_V11 = 9, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV11 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8_V11 for more information.*/ + COUNT = 10 /*!< The number of parsing method that come along with the \b vpDetectorDNNOpenCV class.*/ } DNNResultsParsingType; typedef struct DetectionCandidates @@ -560,7 +562,7 @@ class VISP_EXPORT vpDetectorDNNOpenCV void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); - void postProcess_YoloV8(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); + void postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); diff --git a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp index d3846019e1..6696e8c83b 100644 --- a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp +++ b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp @@ -50,7 +50,7 @@ BEGIN_VISP_NAMESPACE * * \return std::string The list of the supported parsing methods / types of DNNs. */ -std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes() + std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes() { std::string list = "["; for (unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) { @@ -88,6 +88,9 @@ std::string vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(const DNNResultsP case YOLO_V8: name = "yolov8"; break; + case YOLO_V11: + name = "yolov11"; + break; case FASTER_RCNN: name = "faster-rcnn"; break; @@ -474,7 +477,7 @@ std::vector vpDetectorDNNOpenCV::getOutputsNames() names.resize(outLayers.size()); for (size_t i = 0; i < outLayers.size(); ++i) names[i] = layersNames[outLayers[i] - 1]; - } +} return names; } #endif @@ -499,7 +502,8 @@ void vpDetectorDNNOpenCV::postProcess(DetectionCandidates &proposals) postProcess_YoloV5_V7(proposals, m_dnnRes, m_netConfig); break; case YOLO_V8: - postProcess_YoloV8(proposals, m_dnnRes, m_netConfig); + case YOLO_V11: + postProcess_YoloV8_V11(proposals, m_dnnRes, m_netConfig); break; case FASTER_RCNN: postProcess_FasterRCNN(proposals, m_dnnRes, m_netConfig); @@ -815,7 +819,7 @@ void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals, \param dnnRes: raw results of the \b vpDetectorDNNOpenCV::detect step. \param netConfig: the configuration of the network, to know for instance the DNN input size. */ -void vpDetectorDNNOpenCV::postProcess_YoloV8(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig) +void vpDetectorDNNOpenCV::postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig) { // Code adapted from here: https://github.com/JustasBart/yolov8_CPP_Inference_OpenCV_ONNX/blob/minimalistic/inference.cpp // Compute the ratio between the original size of the image and the network size to translate network coordinates into @@ -965,7 +969,7 @@ void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposa proposals.m_confidences.push_back(maxScore); proposals.m_boxes.push_back(cv::Rect(left, top, width, height)); proposals.m_classIds.push_back(classId); - } +} } } #endif @@ -1146,7 +1150,7 @@ void vpDetectorDNNOpenCV::setPreferableTarget(const int &targetId) { m_net.setPr void vpDetectorDNNOpenCV::setScaleFactor(const double &scaleFactor) { m_netConfig.m_scaleFactor = scaleFactor; - if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) { + if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) { std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl; } } @@ -1169,7 +1173,7 @@ void vpDetectorDNNOpenCV::setParsingMethod(const DNNResultsParsingType &typePars { m_netConfig.m_parsingMethodType = typeParsingMethod; m_parsingMethod = parsingMethod; - if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) { + if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) { m_netConfig.m_scaleFactor = 1 / 255.; std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl; }