From 1272b2287763c2eef1adce07f3bba72fc1a80cd5 Mon Sep 17 00:00:00 2001 From: LAGNEAU Romain Date: Wed, 16 Oct 2024 15:21:14 +0200 Subject: [PATCH 1/4] [CORE] Minor changes to handle YoloV11 --- .../visp3/detection/vpDetectorDNNOpenCV.h | 8 +++++--- .../detection/src/dnn/vpDetectorDNNOpenCV.cpp | 18 +++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h index 2da8aafe2d..23e2e7e3af 100644 --- a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h +++ b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h @@ -74,6 +74,7 @@ BEGIN_VISP_NAMESPACE * - Yolo v5, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov5 network * - Yolo v7, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov7 network * - Yolo v8, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov8 network + * - Yolo v11, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov11 network * * This class can be initialized from a JSON file if ViSP has been compiled with NLOHMANN JSON (see \ref soft_tool_json to see how to do it). * Examples of such JSON files can be found in the tutorial folder. @@ -98,8 +99,9 @@ class VISP_EXPORT vpDetectorDNNOpenCV YOLO_V4 = 5, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV4 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV3_V4 for more information.*/ YOLO_V5 = 6, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV5 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV5_V7 for more information.*/ YOLO_V7 = 7, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV7 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV5_V7 for more information.*/ - YOLO_V8 = 8, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV8 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8 for more information.*/ - COUNT = 9 /*!< The number of parsing method that come along with the \b vpDetectorDNNOpenCV class.*/ + YOLO_V8 = 8, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV8 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8_V11 for more information.*/ + YOLO_V11 = 9, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV11 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8_V11 for more information.*/ + COUNT = 10 /*!< The number of parsing method that come along with the \b vpDetectorDNNOpenCV class.*/ } DNNResultsParsingType; typedef struct DetectionCandidates @@ -560,7 +562,7 @@ class VISP_EXPORT vpDetectorDNNOpenCV void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); - void postProcess_YoloV8(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); + void postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig); diff --git a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp index d3846019e1..6696e8c83b 100644 --- a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp +++ b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp @@ -50,7 +50,7 @@ BEGIN_VISP_NAMESPACE * * \return std::string The list of the supported parsing methods / types of DNNs. */ -std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes() + std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes() { std::string list = "["; for (unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) { @@ -88,6 +88,9 @@ std::string vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(const DNNResultsP case YOLO_V8: name = "yolov8"; break; + case YOLO_V11: + name = "yolov11"; + break; case FASTER_RCNN: name = "faster-rcnn"; break; @@ -474,7 +477,7 @@ std::vector vpDetectorDNNOpenCV::getOutputsNames() names.resize(outLayers.size()); for (size_t i = 0; i < outLayers.size(); ++i) names[i] = layersNames[outLayers[i] - 1]; - } +} return names; } #endif @@ -499,7 +502,8 @@ void vpDetectorDNNOpenCV::postProcess(DetectionCandidates &proposals) postProcess_YoloV5_V7(proposals, m_dnnRes, m_netConfig); break; case YOLO_V8: - postProcess_YoloV8(proposals, m_dnnRes, m_netConfig); + case YOLO_V11: + postProcess_YoloV8_V11(proposals, m_dnnRes, m_netConfig); break; case FASTER_RCNN: postProcess_FasterRCNN(proposals, m_dnnRes, m_netConfig); @@ -815,7 +819,7 @@ void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals, \param dnnRes: raw results of the \b vpDetectorDNNOpenCV::detect step. \param netConfig: the configuration of the network, to know for instance the DNN input size. */ -void vpDetectorDNNOpenCV::postProcess_YoloV8(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig) +void vpDetectorDNNOpenCV::postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector &dnnRes, const NetConfig &netConfig) { // Code adapted from here: https://github.com/JustasBart/yolov8_CPP_Inference_OpenCV_ONNX/blob/minimalistic/inference.cpp // Compute the ratio between the original size of the image and the network size to translate network coordinates into @@ -965,7 +969,7 @@ void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposa proposals.m_confidences.push_back(maxScore); proposals.m_boxes.push_back(cv::Rect(left, top, width, height)); proposals.m_classIds.push_back(classId); - } +} } } #endif @@ -1146,7 +1150,7 @@ void vpDetectorDNNOpenCV::setPreferableTarget(const int &targetId) { m_net.setPr void vpDetectorDNNOpenCV::setScaleFactor(const double &scaleFactor) { m_netConfig.m_scaleFactor = scaleFactor; - if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) { + if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) { std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl; } } @@ -1169,7 +1173,7 @@ void vpDetectorDNNOpenCV::setParsingMethod(const DNNResultsParsingType &typePars { m_netConfig.m_parsingMethodType = typeParsingMethod; m_parsingMethod = parsingMethod; - if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) { + if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) { m_netConfig.m_scaleFactor = 1 / 255.; std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl; } From f8eb2a1a43f75f21f794b7c9e1890402cb3e8b70 Mon Sep 17 00:00:00 2001 From: LAGNEAU Romain Date: Wed, 16 Oct 2024 15:45:14 +0200 Subject: [PATCH 2/4] [DOC] Updated doc to explain how to use a Yolo v11 with ViSP --- .../detection_dnn/tutorial-detection-dnn.dox | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox b/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox index c5897fb046..2608ad5b08 100644 --- a/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox +++ b/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox @@ -508,6 +508,42 @@ $ ./tutorial-dnn-object-detection-live --input-json ./default_yolov8.json If you want to train your own YoloV8 model, please refer to the [official documentation](https://docs.ultralytics.com/modes/train/). +\subsubsection dnn_supported_yolov11 Yolo v11 + +Please follow the [official documentation](https://docs.ultralytics.com/quickstart/#install-ultralytics) +to install Ultralytics' tools in order to be able to train or export a model. The installation using Docker has been tested for +the sake of this tutorial. + +You can get the pre-trained YoloV11 models [here](https://docs.ultralytics.com/models/yolo11/#performance-metrics) . For +this tutorial, we tested the [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) +pre-trained model. + +To export a model stored in Pytorch format into an ONNX format, you can use the Ultralytics' tool: +``` +$ sudo docker run -it --ipc=host --gpus all ultralytics/ultralytics:latest +root@8efe0fdbe196:/ultralytics#yolo export model=/path/to/yolo11s.pt format=onnx imgsz=640 opset=12 +``` + +\note The `opset` option permits to set the version of ONNX to use to export the model. If you use OpenCV 4.10.0 this +option does not seem to be required. + +\note It seems that OpenCV 4.7.0 is not compatible with Yolo v11. To upgrade OpenCV please follow the instructions in +the section \ref dnn_model_upgrade_opencv below. + +Please use the following commands to run the tutorial program: +``` +$ DNN_PATH=/path/to/my/dnn/folder \ + CONFIG=none \ + MODEL=${DNN_PATH}/yolov11/weights/yolov11s.onnx \ + LABELS=${DNN_PATH}/yolov11/cfg/coco_classes.txt \ + TYPE=yolov11 \ + FRAMEWORK=onnx \ + WIDTH=640 HEIGHT=640 +$ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ + --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ + --filterThresh -0.25 --scale 0.0039 +``` + \section dnn_model_other Other dnn models \subsection dnn_model_other_zoo OpenCV model zoo @@ -556,6 +592,63 @@ Aborted (core dumped) You may have been missing the onnxsim library or forgotten to remove the `--end2end` option during the export of the network. +\subsection dnn_error_yolov11 Yolo v11: several issues possible + +You may face the following error: +``` +what(): OpenCV(4.7.0) /root/3rdparty/opencv/modules/dnn/src/onnx/onnx_importer.cpp:1073: error: (-2:Unspecified error) in function 'handleNode' +> Node [Split@ai.onnx]:(onnx_node!/model.10/m/m.0/attn/Split) parse error: OpenCV(4.7.0) /root/3rdparty/opencv/modules/dnn/src/layers/slice_layer.cpp:274: error: (-215:Assertion failed) splits > 0 && inpShape[axis_rw] % splits == 0 in function 'getMemoryShapes' +``` +It is because the version of ONNX used to export the model does not match the one that OpenCV uses. Please be sure that you used the `opset` option in the export command, such as follow: +``` +yolo export model=/path/to/yolo11s.pt format=onnx imgsz=640 opset=12 +``` +\note The `opset` option does not seem to be needed with OpenCV 4.10.0 . + +You may face the following error when trying to run the tutorial with a Yolo v11 model: +``` +terminate called after throwing an instance of 'cv::Exception' + what(): OpenCV(4.7.0) /root/3rdparty/opencv/modules/dnn/src/net_impl_fuse.cpp:252: error: (-215:Assertion failed) biasLayerData->outputBlobs.size() == 1 in function 'fuseLayers' +``` +It is because the OpenCV version that you use is too old. Please update OpenCV following the instructions presented in +the \ref dnn_model_upgrade_opencv below. + +\subsubsection dnn_model_upgrade_opencv Upgrading OpenCV from source + +We suppose that OpenCV has been installed from source as described in the section \ref build_opencv_with_cuda +above. + +To upgrade OpenCV, please follow the steps below: + +``` +$ cd $VISP_WS/3rdparty/opencv +$ git fecth +$ git checkout 4.10.0 +$ cd build +$ cmake .. \ +-DCMAKE_BUILD_TYPE=RELEASE \ +-DCMAKE_INSTALL_PREFIX=/usr \ +-DCMAKE_INSTALL_LIBDIR=lib \ +-DWITH_CUDA=ON \ +-DWITH_CUDNN=ON \ +-DOPENCV_DNN_CUDA=ON \ +-DENABLE_FAST_MATH=1 \ +-DCUDA_FAST_MATH=1 \ +-DCUDA_ARCH_BIN=${GPU_CAPABILITIES} \ +-DWITH_CUBLAS=1 \ +-DOPENCV_EXTRA_MODULES_PATH=${HOME}/visp_ws/3rdparty/opencv_contrib/modules \ +-DBUILD_PERF_TESTS=Off \ +-DBUILD_TESTS=Off \ +-DBUILD_EXAMPLES=Off \ +-DBUILD_opencv_apps=Off \ +-DBUILD_opencv_java_bindings_generator=Off \ +-DBUILD_opencv_js=Off +$ make -j$(nproc) install +$ cd $VISP_WS/visp-build +$ cmake ../visp +$ make -j$(nproc) +``` + \section dnn_next Next tutorial You may continue following \ref tutorial-detection-tensorrt. From 044644153175e04ce5d4dd13ebceb3a04e0f4fde Mon Sep 17 00:00:00 2001 From: Fabien Spindler Date: Fri, 1 Nov 2024 11:26:32 +0100 Subject: [PATCH 3/4] Update copyright header and fix code indentation --- .../include/visp3/detection/vpDetectorDNNOpenCV.h | 7 ++++--- modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp | 13 ++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h index 23e2e7e3af..08f33f4dbd 100644 --- a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h +++ b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h @@ -1,6 +1,6 @@ /* * ViSP, open source Visual Servoing Platform software. - * Copyright (C) 2005 - 2023 by Inria. All rights reserved. + * Copyright (C) 2005 - 2024 by Inria. All rights reserved. * * This software is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,8 +30,9 @@ * Description: * DNN object detection using OpenCV DNN module. */ -#ifndef _vpDetectorDNN_h_ -#define _vpDetectorDNN_h_ + +#ifndef VP_DETECTOR_DNN_OPENCV_H +#define VP_DETECTOR_DNN_OPENCV_H #include diff --git a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp index 6696e8c83b..39b6bdbb35 100644 --- a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp +++ b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp @@ -1,7 +1,6 @@ -/**************************************************************************** - * +/* * ViSP, open source Visual Servoing Platform software. - * Copyright (C) 2005 - 2023 by Inria. All rights reserved. + * Copyright (C) 2005 - 2024 by Inria. All rights reserved. * * This software is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,8 +29,8 @@ * * Description: * DNN object detection using OpenCV DNN module. - * -*****************************************************************************/ + */ + #include // Check if std:c++17 or higher @@ -477,7 +476,7 @@ std::vector vpDetectorDNNOpenCV::getOutputsNames() names.resize(outLayers.size()); for (size_t i = 0; i < outLayers.size(); ++i) names[i] = layersNames[outLayers[i] - 1]; -} + } return names; } #endif @@ -969,7 +968,7 @@ void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposa proposals.m_confidences.push_back(maxScore); proposals.m_boxes.push_back(cv::Rect(left, top, width, height)); proposals.m_classIds.push_back(classId); -} + } } } #endif From 066bed427b5a946bc0a112d30c0e37ab1b02620d Mon Sep 17 00:00:00 2001 From: Fabien Spindler Date: Fri, 1 Nov 2024 12:01:26 +0100 Subject: [PATCH 4/4] Update dnn tutorial doc --- .../detection_dnn/tutorial-detection-dnn.dox | 262 +++++++++--------- 1 file changed, 135 insertions(+), 127 deletions(-) diff --git a/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox b/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox index 2608ad5b08..d97c73a9ae 100644 --- a/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox +++ b/doc/tutorial/detection_dnn/tutorial-detection-dnn.dox @@ -46,9 +46,9 @@ Please ensure to install a CuDNN version that is compatible with your version of 3. Then, you need to determine the Compute capability of your GPU either from the [NVidia website](https://developer.nvidia.com/cuda-gpus) or using the [nvidia-smi tool](https://developer.nvidia.com/nvidia-system-management-interface). On a Debian distribution, you would run: - ``` - $ export GPU_CAPABILITIES=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader) - ``` +\code{.sh} +$ export GPU_CAPABILITIES=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader) +\endcode 4. Check if the package already installed on your computer. On a Debian distribution, you would run: ``` $ apt list --installed | grep -i opencv @@ -61,51 +61,50 @@ or using the [nvidia-smi tool](https://developer.nvidia.com/nvidia-system-manage ``` 5. Install OpenCV dependencies. On a Debian distribution, you would run: - ``` - ## libx11-dev is a recommended ViSP 3rd parties - # If you installed another version of CUDA, please install the version of CuDNN which is compatible with your version - $ sudo apt update - $ sudo apt install libgtk-3-dev \ - cmake \ - git \ - pip \ - cmake-curses-gui \ - locate \ - libx11-dev - ``` +\code{.sh} +# libx11-dev is a recommended ViSP 3rd parties +# If you installed another version of CUDA, please install the version of CuDNN which is compatible with your version +$ sudo apt update +$ sudo apt install libgtk-3-dev \ + cmake \ + git \ + pip \ + cmake-curses-gui \ + locate \ + libx11-dev +\endcode 6. Get the sources. The \b vpDetectorDNNOpenCV has been tested with **OpenCV 4.7**. First, get the OpenCV_contrib sources, that contain the Cuda DNN module. On a Debian distribution, you would run: - ``` - $ cd ${HOME}/visp_ws/3rdparty/ - $ git clone --branch 4.7.0 https://github.com/opencv/opencv_contrib - $ git clone --branch 4.7.0 https://github.com/opencv/opencv - ``` +\code{.sh} +$ cd ${HOME}/visp_ws/3rdparty/ +$ git clone --branch 4.7.0 https://github.com/opencv/opencv_contrib +$ git clone --branch 4.7.0 https://github.com/opencv/opencv +\endcode 7. Compile OpenCV and install it from source. On a Debian distribution, you would run: - ``` - $ mkdir -p ${HOME}/visp_ws/3rdparty/opencv/build &&\ - cd ${HOME}/visp_ws/3rdparty/opencv/build - $ cmake .. \ - -DCMAKE_BUILD_TYPE=RELEASE \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DWITH_CUDA=ON \ - -DWITH_CUDNN=ON \ - -DOPENCV_DNN_CUDA=ON \ - -DENABLE_FAST_MATH=1 \ - -DCUDA_FAST_MATH=1 \ - -DCUDA_ARCH_BIN=${GPU_CAPABILITIES} \ - -DWITH_CUBLAS=1 \ - -DOPENCV_EXTRA_MODULES_PATH=${HOME}/visp_ws/3rdparty/opencv_contrib/modules \ - -DBUILD_PERF_TESTS=Off \ - -DBUILD_TESTS=Off \ - -DBUILD_EXAMPLES=Off \ - -DBUILD_opencv_apps=Off \ - -DBUILD_opencv_java_bindings_generator=Off \ - -DBUILD_opencv_js=Off - ``` +\code{.sh} +$ mkdir -p ${HOME}/visp_ws/3rdparty/opencv/build && cd ${HOME}/visp_ws/3rdparty/opencv/build +$ cmake .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DWITH_CUDA=ON \ + -DWITH_CUDNN=ON \ + -DOPENCV_DNN_CUDA=ON \ + -DENABLE_FAST_MATH=1 \ + -DCUDA_FAST_MATH=1 \ + -DCUDA_ARCH_BIN=${GPU_CAPABILITIES} \ + -DWITH_CUBLAS=1 \ + -DOPENCV_EXTRA_MODULES_PATH=${HOME}/visp_ws/3rdparty/opencv_contrib/modules \ + -DBUILD_PERF_TESTS=Off \ + -DBUILD_TESTS=Off \ + -DBUILD_EXAMPLES=Off \ + -DBUILD_opencv_apps=Off \ + -DBUILD_opencv_java_bindings_generator=Off \ + -DBUILD_opencv_js=Off +\endcode 8. Compile and install OpenCV. On a Debian distribution, you would run: ``` @@ -230,22 +229,22 @@ or for a non-sorted vector with: The default behavior is to detect human faces, but you can input another model to detect the objects you want. To see which are the options, run: -``` +\code{.sh} $ cd $VISP_WS/visp-build/tutorial/detection/dnn $ ./tutorial-dnn-object-detection-live --help -``` +\endcode \subsection dnn_usecase_face_detection Face detection The default behavior is to detect human faces using a model provided by OpenCV and learned over a ResNet 10 network. If you have a laptop, simply run: -``` +\code{.sh} $ cd $VISP_WS/visp-build/tutorial/detection/dnn $ ./tutorial-dnn-object-detection-live -``` +\endcode The previous command is similar to the next one: -``` +\code{.sh} $ CONFIG=opencv_face_detector.pbtxt \ MODEL=opencv_face_detector_uint8.pb \ LABELS=class.txt \ @@ -255,7 +254,7 @@ $ CONFIG=opencv_face_detector.pbtxt \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --confThresh 0.35 --filterThresh -0.25 --scale 1 -``` +\endcode \subsection dnn_models_coco COCO dataset objects detection @@ -271,7 +270,7 @@ the weights (`frozen_inference_graph.pb`) [there](http://download.tensorflow.org and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/master/tutorial/detection/dnn/coco_classes.txt). To run the tutorial with the Faster-RCNN network, please run the following commands: -``` +\code{.sh} $ cd $VISP_WS/visp-build/tutorial/detection/dnn $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=${DNN_PATH}/Faster-RCNN/cfg/config.pbtxt \ @@ -283,13 +282,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --confThresh 0.35 --filterThresh -0.25 --scale 1 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_faster-rcnn.json -``` +\endcode If you want to train your own Faster-RCNN model, please refer to this [tutorial](https://debuggercafe.com/how-to-train-faster-rcnn-resnet50-fpn-v2-on-custom-dataset/). @@ -303,7 +302,7 @@ and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/ The parameters to use with this network were found [there](https://github.com/opencv/opencv/blob/0052d46b8e33c7bfe0e1450e4bff28b88f455570/samples/dnn/models.yml#L68). To run the tutorial with the `Mobilenet V1` network, please run the following commands: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=${DNN_PATH}/MobileNet-SSD/cfg/ssd_mobilenet_v1_coco_2017_11_17.pbtxt \ MODEL=${DNN_PATH}/MobileNet-SSD/weights/frozen_inference_graph.pb \ @@ -314,13 +313,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 1 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_ssd-mobilenet_v1.json -``` +\endcode If you would rather use the v3 of Mobilenet-SSD, please download the config file (`ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt`) [here](https://gist.github.com/dkurt/54a8e8b51beb3bd3f770b79e56927bd7), @@ -328,7 +327,7 @@ the weights (`frozen_inference_graph.pb`) [there](http://download.tensorflow.org and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/master/tutorial/detection/dnn/coco_classes.txt). Then, to run the tutorial with the `Mobilenet V3` network, please run the following commands: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=${DNN_PATH}/MobileNet-SSD/cfg/ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt \ MODEL=${DNN_PATH}/MobileNet-SSD/weights/frozen_inference_graph.pb \ @@ -339,13 +338,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0.0019 0.0019 0.0019 \ --filterThresh -0.25 --scale 0.00389 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_ssd-mobilenet_v3.json -``` +\endcode If you want to train your own MobileNet SSD model, please refer to this [tutorial](https://www.forecr.io/blogs/ai-algorithms/how-to-train-ssd-mobilenet-model-for-object-detection-using-pytorch) @@ -358,7 +357,7 @@ the weights (`yolov3.weights`) [there](https://pjreddie.com/media/files/yolov3.w and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/master/tutorial/detection/dnn/coco_classes.txt). To run the tutorial program `tutorial-dnn-object-detection-live.cpp`, use the following commands: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=${DNN_PATH}/yolov3/cfg/yolov3.cfg \ MODEL=${DNN_PATH}/yolov3/weights/yolov3.weights \ @@ -369,13 +368,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 0.0039 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_yolov3.json -``` +\endcode If you want to train your own YoloV3 model, please refer to the [official documentation](https://github.com/ultralytics/yolov3). @@ -386,7 +385,7 @@ the weights (`yolov4-tiny.weights`) [there](https://github.com/AlexeyAB/darknet/ and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/master/tutorial/detection/dnn/coco_classes.txt). To run the tutorial program `tutorial-dnn-object-detection-live.cpp`, use the following commands: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=${DNN_PATH}/yolov4/cfg/yolov4-tiny.cfg \ MODEL=${DNN_PATH}/yolov4/weights/yolov4-tiny.weights \ @@ -397,13 +396,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 0.0039 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_yolov4.json -``` +\endcode If you want to train your own YoloV4 model, please refer to the [official documentation](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects). @@ -415,7 +414,7 @@ and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/ \note You do not need a config file when using a network saved in ONNX format. To run the tutorial program `tutorial-dnn-object-detection-live.cpp`, use the following commands: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=none \ MODEL=${DNN_PATH}/yolov5/weights/yolov5n.onnx \ @@ -426,13 +425,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 0.0039 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_yolov5.json -``` +\endcode If you want to train your own YoloV5 model, please refer to the [official documentation](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#13-prepare-dataset-for-yolov5). @@ -444,12 +443,12 @@ in the Pytorch format from [here](https://github.com/WongKinYiu/yolov7/releases/ Then, convert it in ONNX format using the `export.py` script that you can find on the [YoloV7 repo](https://github.com/WongKinYiu/yolov7) with the following arguments: -``` +\code{.sh} $ python3 export.py --weights ../weights/yolov7-tiny.pt --grid --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 -``` +\endcode Finally, use the following commands to run the tutorial program: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=none \ MODEL=${DNN_PATH}/yolov7/weights/yolov7-tiny.onnx \ @@ -460,15 +459,15 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 0.0039 -``` +\endcode \note You do not need a config file when using a network saved in ONNX format. Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_yolov7.json -``` +\endcode If you want to train your own YoloV7 model, please refer to the [official documentation](https://github.com/WongKinYiu/yolov7#transfer-learning). If your dataset is rather small (only hundreds of pictures), you may want to consider to base your training on @@ -485,7 +484,7 @@ You can find the weights (`yolov8s.onnx`) in ONNX format and the labels (`coco_classes.txt`) [here](https://github.com/lagadic/visp/blob/master/tutorial/detection/dnn/coco_classes.txt). Please use the following commands to run the tutorial program: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=none \ MODEL=${DNN_PATH}/yolov8/weights/yolov8s.onnx \ @@ -496,13 +495,13 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 0.0039 -``` +\endcode Alternatively, if you have installed the NLOHMANN JSON library and you are using the weights quoted above, you can use the following command line: -``` +\code{.sh} $ ./tutorial-dnn-object-detection-live --input-json ./default_yolov8.json -``` +\endcode \note You do not need a config file when using a network saved in ONNX format. @@ -511,18 +510,18 @@ If you want to train your own YoloV8 model, please refer to the [official docume \subsubsection dnn_supported_yolov11 Yolo v11 Please follow the [official documentation](https://docs.ultralytics.com/quickstart/#install-ultralytics) -to install Ultralytics' tools in order to be able to train or export a model. The installation using Docker has been tested for +to install Ultralytics tools in order to be able to train or export a model. The installation using Docker has been tested for the sake of this tutorial. -You can get the pre-trained YoloV11 models [here](https://docs.ultralytics.com/models/yolo11/#performance-metrics) . For +You can get the pre-trained YoloV11 models [here](https://docs.ultralytics.com/models/yolo11/#performance-metrics). For this tutorial, we tested the [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) pre-trained model. -To export a model stored in Pytorch format into an ONNX format, you can use the Ultralytics' tool: -``` +To export a model stored in Pytorch format into an ONNX format, you can use the Ultralytics tool: +\code{.sh} $ sudo docker run -it --ipc=host --gpus all ultralytics/ultralytics:latest -root@8efe0fdbe196:/ultralytics#yolo export model=/path/to/yolo11s.pt format=onnx imgsz=640 opset=12 -``` +root@8efe0fdbe196:/ultralytics# yolo export model=/path/to/yolo11s.pt format=onnx imgsz=640 opset=12 +\endcode \note The `opset` option permits to set the version of ONNX to use to export the model. If you use OpenCV 4.10.0 this option does not seem to be required. @@ -531,7 +530,7 @@ option does not seem to be required. the section \ref dnn_model_upgrade_opencv below. Please use the following commands to run the tutorial program: -``` +\code{.sh} $ DNN_PATH=/path/to/my/dnn/folder \ CONFIG=none \ MODEL=${DNN_PATH}/yolov11/weights/yolov11s.onnx \ @@ -542,7 +541,7 @@ $ DNN_PATH=/path/to/my/dnn/folder \ $ ./tutorial-dnn-object-detection-live --model $MODEL --labels $LABELS --config $CONFIG --type $TYPE \ --framework $FRAMEWORK --width $WIDTH --height $HEIGHT --nmsThresh 0.5 --mean 0 0 0 \ --filterThresh -0.25 --scale 0.0039 -``` +\endcode \section dnn_model_other Other dnn models \subsection dnn_model_other_zoo OpenCV model zoo @@ -570,11 +569,11 @@ that does not match the one expected by the DNN). \subsection dnn_error_unimplemented YoloV3: transpose weights is not functionNotImplementedError -``` +\code{.sh} terminate called after throwing an instance of 'cv::Exception' what(): OpenCV(4.7.0) error: (-213:The function/feature is not implemented) Transpose the weights (except for convolutional) is not implemented in function 'ReadDarknetFromWeightsStream' -``` +\endcode Following the proposition found [here](https://github.com/opencv/opencv/issues/15502#issuecomment-531755462) to download once again the weights from [here](https://pjreddie.com/media/files/yolov3.weights) permitted to solve this error. @@ -582,72 +581,81 @@ again the weights from [here](https://pjreddie.com/media/files/yolov3.weights) p \subsection dnn_error_nonmaxsuppr YoloV7: can't create NonMaxSuppression layer When using a YoloV7 model exported in `onnx` format, one can face the following error: -``` +\code{.sh} [ERROR:0@0.335] global onnx_importer.cpp:1054 handleNode DNN/ONNX: ERROR during processing node with 5 inputs and 1 outputs: [NonMaxSuppression]onnx_node!/end2end/NonMaxSuppression) from domain='ai.onnx' terminate called after throwing an instance of 'cv::Exception' what(): OpenCV(4.7.0) opencv/modules/dnn/src/onnx/onnx_importer.cpp:1073: error: (-2:Unspecified error) in function 'handleNode' Node [NonMaxSuppression@ai.onnx]onnx_node!/end2end/NonMaxSuppression) parse error: OpenCV(4.7.0) opencv/modules/dnn/src/net_impl.hpp:108: error: (-2:Unspecified error) Can't create layer "onnx_node!/end2end/NonMaxSuppression" of type "NonMaxSuppression" in function 'getLayerInstance' Aborted (core dumped) -``` +\endcode You may have been missing the onnxsim library or forgotten to remove the `--end2end` option during the export of the network. -\subsection dnn_error_yolov11 Yolo v11: several issues possible +\subsection dnn_error_yolov11 Yolo v11: Known issues + +\subsubsection dnn_model_onnx_mismatch ONNX version mismatch You may face the following error: -``` +\code{.sh} what(): OpenCV(4.7.0) /root/3rdparty/opencv/modules/dnn/src/onnx/onnx_importer.cpp:1073: error: (-2:Unspecified error) in function 'handleNode' > Node [Split@ai.onnx]:(onnx_node!/model.10/m/m.0/attn/Split) parse error: OpenCV(4.7.0) /root/3rdparty/opencv/modules/dnn/src/layers/slice_layer.cpp:274: error: (-215:Assertion failed) splits > 0 && inpShape[axis_rw] % splits == 0 in function 'getMemoryShapes' -``` -It is because the version of ONNX used to export the model does not match the one that OpenCV uses. Please be sure that you used the `opset` option in the export command, such as follow: -``` -yolo export model=/path/to/yolo11s.pt format=onnx imgsz=640 opset=12 -``` +\endcode +It is because the version of ONNX used to export the model does not match the one that OpenCV uses. +Please be sure that you used the `opset` option in the export command as stated in \ref dnn_supported_yolov11 section, such as follow: +\code{.sh} +$ yolo export model=/path/to/yolo11s.pt format=onnx imgsz=640 opset=12 +\endcode \note The `opset` option does not seem to be needed with OpenCV 4.10.0 . -You may face the following error when trying to run the tutorial with a Yolo v11 model: -``` +\subsubsection dnn_model_opencv_deprecated OpenCV version too old + +You may also face the following error when trying to run the tutorial with a Yolo v11 model: +\code{.sh} terminate called after throwing an instance of 'cv::Exception' what(): OpenCV(4.7.0) /root/3rdparty/opencv/modules/dnn/src/net_impl_fuse.cpp:252: error: (-215:Assertion failed) biasLayerData->outputBlobs.size() == 1 in function 'fuseLayers' -``` +\endcode It is because the OpenCV version that you use is too old. Please update OpenCV following the instructions presented in the \ref dnn_model_upgrade_opencv below. -\subsubsection dnn_model_upgrade_opencv Upgrading OpenCV from source +\subsubsection dnn_model_upgrade_opencv Fix by upgrading OpenCV from source We suppose that OpenCV has been installed from source as described in the section \ref build_opencv_with_cuda above. -To upgrade OpenCV, please follow the steps below: +To upgrade OpenCV to version 4.10.0, please follow the steps below: -``` +\code{.sh} $ cd $VISP_WS/3rdparty/opencv -$ git fecth +$ git fetch $ git checkout 4.10.0 $ cd build $ cmake .. \ --DCMAKE_BUILD_TYPE=RELEASE \ --DCMAKE_INSTALL_PREFIX=/usr \ --DCMAKE_INSTALL_LIBDIR=lib \ --DWITH_CUDA=ON \ --DWITH_CUDNN=ON \ --DOPENCV_DNN_CUDA=ON \ --DENABLE_FAST_MATH=1 \ --DCUDA_FAST_MATH=1 \ --DCUDA_ARCH_BIN=${GPU_CAPABILITIES} \ --DWITH_CUBLAS=1 \ --DOPENCV_EXTRA_MODULES_PATH=${HOME}/visp_ws/3rdparty/opencv_contrib/modules \ --DBUILD_PERF_TESTS=Off \ --DBUILD_TESTS=Off \ --DBUILD_EXAMPLES=Off \ --DBUILD_opencv_apps=Off \ --DBUILD_opencv_java_bindings_generator=Off \ --DBUILD_opencv_js=Off -$ make -j$(nproc) install + -DCMAKE_BUILD_TYPE=RELEASE \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DWITH_CUDA=ON \ + -DWITH_CUDNN=ON \ + -DOPENCV_DNN_CUDA=ON \ + -DENABLE_FAST_MATH=1 \ + -DCUDA_FAST_MATH=1 \ + -DCUDA_ARCH_BIN=${GPU_CAPABILITIES} \ + -DWITH_CUBLAS=1 \ + -DOPENCV_EXTRA_MODULES_PATH=${HOME}/visp_ws/3rdparty/opencv_contrib/modules \ + -DBUILD_PERF_TESTS=Off \ + -DBUILD_TESTS=Off \ + -DBUILD_EXAMPLES=Off \ + -DBUILD_opencv_apps=Off \ + -DBUILD_opencv_java_bindings_generator=Off \ + -DBUILD_opencv_js=Off +$ make -j$(nproc) +$ sudo make install +\endcode +Once OpenCV is build and installed, you need to rebuild ViSP: +\code{.sh} $ cd $VISP_WS/visp-build $ cmake ../visp $ make -j$(nproc) -``` +\endcode \section dnn_next Next tutorial