From 8ef53beac64b1978172f0e39bd74bc335913ee2c Mon Sep 17 00:00:00 2001 From: wa22 Date: Thu, 23 May 2024 05:57:37 +0000 Subject: [PATCH] add yolov10 --- README.md | 341 +--- README.zh-CN.md | 297 --- figures/latency.svg | 2140 ++++++++++++++++++++++ figures/params.svg | 2108 +++++++++++++++++++++ requirements.txt | 9 + ultralytics/__init__.py | 3 +- ultralytics/cfg/__init__.py | 4 + ultralytics/cfg/models/v10/yolov10b.yaml | 40 + ultralytics/cfg/models/v10/yolov10l.yaml | 40 + ultralytics/cfg/models/v10/yolov10m.yaml | 43 + ultralytics/cfg/models/v10/yolov10n.yaml | 40 + ultralytics/cfg/models/v10/yolov10s.yaml | 39 + ultralytics/cfg/models/v10/yolov10x.yaml | 40 + ultralytics/engine/exporter.py | 5 +- ultralytics/engine/trainer.py | 3 +- ultralytics/engine/validator.py | 8 +- ultralytics/models/__init__.py | 3 +- ultralytics/models/yolo/detect/val.py | 2 +- ultralytics/models/yolov10/__init__.py | 5 + ultralytics/models/yolov10/model.py | 18 + ultralytics/models/yolov10/predict.py | 38 + ultralytics/models/yolov10/train.py | 20 + ultralytics/models/yolov10/val.py | 20 + ultralytics/nn/modules/__init__.py | 11 +- ultralytics/nn/modules/block.py | 129 ++ ultralytics/nn/modules/head.py | 67 +- ultralytics/nn/tasks.py | 33 +- ultralytics/utils/loss.py | 16 +- ultralytics/utils/ops.py | 15 + ultralytics/utils/tal.py | 3 +- ultralytics/utils/torch_utils.py | 9 +- 31 files changed, 4948 insertions(+), 601 deletions(-) delete mode 100644 README.zh-CN.md create mode 100644 figures/latency.svg create mode 100644 figures/params.svg create mode 100644 requirements.txt create mode 100644 ultralytics/cfg/models/v10/yolov10b.yaml create mode 100644 ultralytics/cfg/models/v10/yolov10l.yaml create mode 100644 ultralytics/cfg/models/v10/yolov10m.yaml create mode 100644 ultralytics/cfg/models/v10/yolov10n.yaml create mode 100644 ultralytics/cfg/models/v10/yolov10s.yaml create mode 100644 ultralytics/cfg/models/v10/yolov10x.yaml create mode 100644 ultralytics/models/yolov10/__init__.py create mode 100644 ultralytics/models/yolov10/model.py create mode 100644 ultralytics/models/yolov10/predict.py create mode 100644 ultralytics/models/yolov10/train.py create mode 100644 ultralytics/models/yolov10/val.py diff --git a/README.md b/README.md index bb3b596da..82c4f1485 100644 --- a/README.md +++ b/README.md @@ -1,295 +1,84 @@ -
-

- - YOLO Vision banner -

+# [YOLOv10: Real-Time End-to-End Object Detection]() -[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)
-
- Ultralytics CI - Ultralytics Code Coverage - YOLOv8 Citation - Docker Pulls - Discord -
- Run on Gradient - Open In Colab - Open In Kaggle -
-
+Official PyTorch implementation of **YOLOv10**. -[Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics) is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection and tracking, instance segmentation, image classification and pose estimation tasks. +

+ +
+ Comparisons with others in terms of latency-accuracy (left) and size-accuracy (right) trade-offs. +

-We hope that the resources here will help you get the most out of YOLOv8. Please browse the YOLOv8 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! - -To request an Enterprise License please complete the form at [Ultralytics Licensing](https://ultralytics.com/license). - -YOLOv8 performance plots - -
- Ultralytics GitHub - space - Ultralytics LinkedIn - space - Ultralytics Twitter - space - Ultralytics YouTube - space - Ultralytics TikTok - space - Ultralytics Instagram - space - Ultralytics Discord -
-
- -##
Documentation
- -See below for a quickstart installation and usage example, and see the [YOLOv8 Docs](https://docs.ultralytics.com) for full documentation on training, validation, prediction and deployment. - -
-Install - -Pip install the ultralytics package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). - -[![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) - -```bash -pip install ultralytics -``` - -For alternative installation methods including [Conda](https://anaconda.org/conda-forge/ultralytics), [Docker](https://hub.docker.com/r/ultralytics/ultralytics), and Git, please refer to the [Quickstart Guide](https://docs.ultralytics.com/quickstart). +[YOLOv10: Real-Time End-to-End Object Detection]().\ +Ao Wang, Hui Chen, Lihao Liu, Kai Chen, Zijia Lin, Jungong Han, and Guiguang Ding\ +[[`arXiv`]()] +
+ + Abstract + +Over the past years, YOLOs have emerged as the predominant paradigm in the field of real-time object detection owing to their effective balance between computational cost and detection performance. Researchers have explored the architectural designs, optimization objectives, data augmentation strategies, and others for YOLOs, achieving notable progress. However, the reliance on the non-maximum suppression (NMS) for post-processing hampers the end-to-end deployment of YOLOs and adversely impacts the inference latency. Besides, the design of various components in YOLOs lacks the comprehensive and thorough inspection, resulting in noticeable computational redundancy and limiting the model's capability. It renders the suboptimal efficiency, along with considerable potential for performance improvements. In this work, we aim to further advance the performance-efficiency boundary of YOLOs from both the post-processing and the model architecture. To this end, we first present the consistent dual assignments for NMS-free training of YOLOs, which brings the competitive performance and low inference latency simultaneously. Moreover, we introduce the holistic efficiency-accuracy driven model design strategy for YOLOs. We comprehensively optimize various components of YOLOs from both the efficiency and accuracy perspectives, which greatly reduces the computational overhead and enhances the capability. The outcome of our effort is a new generation of YOLO series for real-time end-to-end object detection, dubbed YOLOv10. Extensive experiments show that YOLOv10 achieves the state-of-the-art performance and efficiency across various model scales. For example, our YOLOv10-S is 1.8$\times$ faster than RT-DETR-R18 under the similar AP on COCO, meanwhile enjoying 2.8$\times$ smaller number of parameters and FLOPs. Compared with YOLOv9-C, YOLOv10-B has 46\% less latency and 25\% fewer parameters for the same performance.
-
-Usage - -### CLI - -YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command: - -```bash -yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' +## Performance +COCO +| Model | Test Size | #Params | FLOPs | APval | Latency | +|:---------------|:----:|:---:|:--:|:--:|:--:| +| [YOLOv10-N](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt) | 640 | 2.3M | 6.7G | 38.5% | 1.84ms | +| [YOLOv10-S](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt) | 640 | 7.2M | 21.6G | 46.3% | 2.49ms | +| [YOLOv10-M](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt) | 640 | 15.4M | 59.1G | 51.1% | 4.74ms | +| [YOLOv10-B](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt) | 640 | 19.1M | 92.0G | 52.5% | 5.74ms | +| [YOLOv10-L](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt) | 640 | 24.4M | 120.3G | 53.2% | 7.28ms | +| [YOLOv10-X](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt) | 640 | 29.5M | 160.4G | 54.4% | 10.70ms | + +## Installation +`conda` virtual environment is recommended. ``` - -`yolo` can be used for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See the YOLOv8 [CLI Docs](https://docs.ultralytics.com/usage/cli) for examples. - -### Python - -YOLOv8 may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above: - -```python -from ultralytics import YOLO - -# Load a model -model = YOLO("yolov8n.yaml") # build a new model from scratch -model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) - -# Use the model -model.train(data="coco128.yaml", epochs=3) # train the model -metrics = model.val() # evaluate model performance on the validation set -results = model("https://ultralytics.com/images/bus.jpg") # predict on an image -path = model.export(format="onnx") # export the model to ONNX format +conda create -n repvit python=3.9 +pip install -r requirements.txt +pip install -e . ``` -See YOLOv8 [Python Docs](https://docs.ultralytics.com/usage/python) for more examples. - -
- -### Notebooks - -Ultralytics provides interactive notebooks for YOLOv8, covering training, validation, tracking, and more. Each notebook is paired with a [YouTube](https://youtube.com/ultralytics) tutorial, making it easy to learn and implement advanced YOLOv8 features. - -| Docs | Notebook | YouTube | -| --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| YOLOv8 Train, Val, Predict and Export Modes | Open In Colab |
Ultralytics Youtube Video
| -| Ultralytics HUB QuickStart | Open In Colab |
Ultralytics Youtube Video
| -| YOLOv8 Multi-Object Tracking in Videos | Open In Colab |
Ultralytics Youtube Video
| -| YOLOv8 Object Counting in Videos | Open In Colab |
Ultralytics Youtube Video
| -| YOLOv8 Heatmaps in Videos | Open In Colab |
Ultralytics Youtube Video
| -| Ultralytics Datasets Explorer with SQL and OpenAI Integration 🚀 New | Open In Colab |
Ultralytics Youtube Video
| - -##
Models
- -YOLOv8 [Detect](https://docs.ultralytics.com/tasks/detect), [Segment](https://docs.ultralytics.com/tasks/segment) and [Pose](https://docs.ultralytics.com/tasks/pose) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco) dataset are available here, as well as YOLOv8 [Classify](https://docs.ultralytics.com/tasks/classify) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet) dataset. [Track](https://docs.ultralytics.com/modes/track) mode is available for all Detect, Segment and Pose models. - -Ultralytics YOLO supported tasks - -All [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. - -
Detection (COCO) - -See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examples with these models trained on [COCO](https://docs.ultralytics.com/datasets/detect/coco/), which include 80 pre-trained classes. - -| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | - -- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org) dataset.
Reproduce by `yolo val detect data=coco.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val detect data=coco.yaml batch=1 device=0|cpu` - -
- -
Detection (Open Image V7) - -See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examples with these models trained on [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/), which include 600 pre-trained classes. - -| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | - -- **mAPval** values are for single-model single-scale on [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/) dataset.
Reproduce by `yolo val detect data=open-images-v7.yaml device=0` -- **Speed** averaged over Open Image V7 val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val detect data=open-images-v7.yaml batch=1 device=0|cpu` - -
- -
Segmentation (COCO) - -See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for usage examples with these models trained on [COCO-Seg](https://docs.ultralytics.com/datasets/segment/coco/), which include 80 pre-trained classes. - -| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | -| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | -| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | -| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | -| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | - -- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org) dataset.
Reproduce by `yolo val segment data=coco-seg.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu` - -
- -
Pose (COCO) - -See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for usage examples with these models trained on [COCO-Pose](https://docs.ultralytics.com/datasets/pose/coco/), which include 1 pre-trained class, person. - -| Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | -| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | -| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | -| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | -| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | -| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | - -- **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](https://cocodataset.org) dataset.
Reproduce by `yolo val pose data=coco-pose.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val pose data=coco-pose.yaml batch=1 device=0|cpu` - -
- -
OBB (DOTAv1) - -See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for usage examples with these models trained on [DOTAv1](https://docs.ultralytics.com/datasets/obb/dota-v2/#dota-v10/), which include 15 pre-trained classes. - -| Model | size
(pixels) | mAPtest
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-obb.pt) | 1024 | 78.0 | 204.77 | 3.57 | 3.1 | 23.3 | -| [YOLOv8s-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-obb.pt) | 1024 | 79.5 | 424.88 | 4.07 | 11.4 | 76.3 | -| [YOLOv8m-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-obb.pt) | 1024 | 80.5 | 763.48 | 7.61 | 26.4 | 208.6 | -| [YOLOv8l-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-obb.pt) | 1024 | 80.7 | 1278.42 | 11.83 | 44.5 | 433.8 | -| [YOLOv8x-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-obb.pt) | 1024 | 81.36 | 1759.10 | 13.23 | 69.5 | 676.7 | - -- **mAPtest** values are for single-model multiscale on [DOTAv1](https://captain-whu.github.io/DOTA/index.html) dataset.
Reproduce by `yolo val obb data=DOTAv1.yaml device=0 split=test` and submit merged results to [DOTA evaluation](https://captain-whu.github.io/DOTA/evaluation.html). -- **Speed** averaged over DOTAv1 val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val obb data=DOTAv1.yaml batch=1 device=0|cpu` - -
- -
Classification (ImageNet) - -See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usage examples with these models trained on [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), which include 1000 pretrained classes. - -| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | -| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | -| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-cls.pt) | 224 | 69.0 | 88.3 | 12.9 | 0.31 | 2.7 | 4.3 | -| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-cls.pt) | 224 | 73.8 | 91.7 | 23.4 | 0.35 | 6.4 | 13.5 | -| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-cls.pt) | 224 | 76.8 | 93.5 | 85.4 | 0.62 | 17.0 | 42.7 | -| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-cls.pt) | 224 | 76.8 | 93.5 | 163.0 | 0.87 | 37.5 | 99.7 | -| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-cls.pt) | 224 | 79.0 | 94.6 | 232.0 | 1.01 | 57.4 | 154.8 | - -- **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set.
Reproduce by `yolo val classify data=path/to/ImageNet device=0` -- **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` - -
- -##
Integrations
- -Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [Roboflow](https://roboflow.com/?ref=ultralytics), ClearML, [Comet](https://bit.ly/yolov8-readme-comet), Neural Magic and [OpenVINO](https://docs.ultralytics.com/integrations/openvino), can optimize your AI workflow. - -
- -Ultralytics active learning integrations -
-
- -
- - Roboflow logo - space - - ClearML logo - space - - Comet ML logo - space - - NeuralMagic logo -
- -| Roboflow | ClearML ⭐ NEW | Comet ⭐ NEW | Neural Magic ⭐ NEW | -| :--------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | -| Label and export your custom datasets directly to YOLOv8 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | Automatically track, visualize and even remotely train YOLOv8 using [ClearML](https://clear.ml/) (open-source!) | Free forever, [Comet](https://bit.ly/yolov8-readme-comet) lets you save YOLOv8 models, resume training, and interactively visualize and debug predictions | Run YOLOv8 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) | - -##
Ultralytics HUB
- -Experience seamless AI with [Ultralytics HUB](https://bit.ly/ultralytics_hub) ⭐, the all-in-one solution for data visualization, YOLOv5 and YOLOv8 🚀 model training and deployment, without any coding. Transform images into actionable insights and bring your AI visions to life with ease using our cutting-edge platform and user-friendly [Ultralytics App](https://ultralytics.com/app_install). Start your journey for **Free** now! - - -Ultralytics HUB preview image - -##
Contribute
+## Validation +[`yolov10n.pt`](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt) [`yolov10s.pt`](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt) [`yolov10m.pt`](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt) [`yolov10b.pt`](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt) [`yolov10l.pt`](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt) [`yolov10x.pt`](https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt) +``` +yolo val model=yolov10n/s/m/b/l/x.pt data=coco.yaml batch=256 +``` -We love your input! YOLOv5 and YOLOv8 would not be possible without help from our community. Please see our [Contributing Guide](https://docs.ultralytics.com/help/contributing) to get started, and fill out our [Survey](https://ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) to send us feedback on your experience. Thank you 🙏 to all our contributors! +## Training +``` +yolo detect train data=coco.yaml model=yolov10n/s/m/b/l/x.yaml epochs=500 batch=256 imgsz=640 device=0,1,2,3,4,5,6,7 +``` - +## Prediction +``` +yolo predict model=yolov10n/s/m/b/l/x.pt +``` - -Ultralytics open-source contributors +## Export +``` +# End-to-End ONNX +yolo export model=yolov10n/s/m/b/l/x.pt format=onnx opset=13 simplify +# Predict with ONNX +yolo predict model=yolov10n/s/m/b/l/x.onnx + +# End-to-End TensorRT +yolo export model=yolov10n/s/m/b/l/x.pt format=engine half=True simplify opset=13 workspace=16 +# Or +trtexec --onnx=yolov10n/s/m/b/l/x.onnx --saveEngine=yolov10n/s/m/b/l/x.engine --fp16 +# Predict with TensorRT +yolo predict model=yolov10n/s/m/b/l/x.engine +``` -##
License
+## Acknowledgement -Ultralytics offers two licensing options to accommodate diverse use cases: +The code base is built with [ultralytics](https://github.com/ultralytics/ultralytics) and [RT-DETR](https://github.com/lyuwenyu/RT-DETR). -- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/licenses/) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details. -- **Enterprise License**: Designed for commercial use, this license permits seamless integration of Ultralytics software and AI models into commercial goods and services, bypassing the open-source requirements of AGPL-3.0. If your scenario involves embedding our solutions into a commercial offering, reach out through [Ultralytics Licensing](https://ultralytics.com/license). +Thanks for the great implementations! -##
Contact
+## Citation -For Ultralytics bug reports and feature requests please visit [GitHub Issues](https://github.com/ultralytics/ultralytics/issues), and join our [Discord](https://ultralytics.com/discord) community for questions and discussions! +If our code or models help your work, please cite our paper: +```BibTeX -
-
- Ultralytics GitHub - space - Ultralytics LinkedIn - space - Ultralytics Twitter - space - Ultralytics YouTube - space - Ultralytics TikTok - space - Ultralytics Instagram - space - Ultralytics Discord -
+``` diff --git a/README.zh-CN.md b/README.zh-CN.md deleted file mode 100644 index 7af023f49..000000000 --- a/README.zh-CN.md +++ /dev/null @@ -1,297 +0,0 @@ -
-

- - YOLO Vision banner -

- -[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)
- -
- Ultralytics CI - Ultralytics Code Coverage - YOLOv8 Citation - Docker Pulls - Discord -
- Run on Gradient - Open In Colab - Open In Kaggle -
-
- -[Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics) 是一款前沿、最先进(SOTA)的模型,基于先前 YOLO 版本的成功,引入了新功能和改进,进一步提升性能和灵活性。YOLOv8 设计快速、准确且易于使用,使其成为各种物体检测与跟踪、实例分割、图像分类和姿态估计任务的绝佳选择。 - -我们希望这里的资源能帮助您充分利用 YOLOv8。请浏览 YOLOv8 文档 了解详细信息,在 GitHub 上提交问题以获得支持,并加入我们的 Discord 社区进行问题和讨论! - -如需申请企业许可,请在 [Ultralytics Licensing](https://ultralytics.com/license) 处填写表格 - -YOLOv8 performance plots - -
- Ultralytics GitHub - space - Ultralytics LinkedIn - space - Ultralytics Twitter - space - Ultralytics YouTube - space - Ultralytics TikTok - space - Ultralytics Instagram - space - Ultralytics Discord -
-
- -以下是提供的内容的中文翻译: - -##
文档
- -请参阅下面的快速安装和使用示例,以及 [YOLOv8 文档](https://docs.ultralytics.com) 上有关训练、验证、预测和部署的完整文档。 - -
-安装 - -使用Pip在一个[**Python>=3.8**](https://www.python.org/)环境中安装`ultralytics`包,此环境还需包含[**PyTorch>=1.8**](https://pytorch.org/get-started/locally/)。这也会安装所有必要的[依赖项](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml)。 - -[![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) - -```bash -pip install ultralytics -``` - -如需使用包括[Conda](https://anaconda.org/conda-forge/ultralytics)、[Docker](https://hub.docker.com/r/ultralytics/ultralytics)和Git在内的其他安装方法,请参考[快速入门指南](https://docs.ultralytics.com/quickstart)。 - -
- -
-Usage - -### CLI - -YOLOv8 可以在命令行界面(CLI)中直接使用,只需输入 `yolo` 命令: - -```bash -yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' -``` - -`yolo` 可用于各种任务和模式,并接受其他参数,例如 `imgsz=640`。查看 YOLOv8 [CLI 文档](https://docs.ultralytics.com/usage/cli)以获取示例。 - -### Python - -YOLOv8 也可以在 Python 环境中直接使用,并接受与上述 CLI 示例中相同的[参数](https://docs.ultralytics.com/usage/cfg/): - -```python -from ultralytics import YOLO - -# 加载模型 -model = YOLO("yolov8n.yaml") # 从头开始构建新模型 -model = YOLO("yolov8n.pt") # 加载预训练模型(建议用于训练) - -# 使用模型 -model.train(data="coco128.yaml", epochs=3) # 训练模型 -metrics = model.val() # 在验证集上评估模型性能 -results = model("https://ultralytics.com/images/bus.jpg") # 对图像进行预测 -success = model.export(format="onnx") # 将模型导出为 ONNX 格式 -``` - -查看 YOLOv8 [Python 文档](https://docs.ultralytics.com/usage/python)以获取更多示例。 - -
- -### 笔记本 - -Ultralytics 提供了 YOLOv8 的交互式笔记本,涵盖训练、验证、跟踪等内容。每个笔记本都配有 [YouTube](https://youtube.com/ultralytics) 教程,使学习和实现高级 YOLOv8 功能变得简单。 - -| 文档 | 笔记本 | YouTube | -| ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| YOLOv8 训练、验证、预测和导出模式 | 在 Colab 中打开 |
Ultralytics Youtube 视频
| -| Ultralytics HUB 快速开始 | 在 Colab 中打开 |
Ultralytics Youtube 视频
| -| YOLOv8 视频中的多对象跟踪 | 在 Colab 中打开 |
Ultralytics Youtube 视频
| -| YOLOv8 视频中的对象计数 | 在 Colab 中打开 |
Ultralytics Youtube 视频
| -| YOLOv8 视频中的热图 | 在 Colab 中打开 |
Ultralytics Youtube 视频
| -| Ultralytics 数据集浏览器,集成 SQL 和 OpenAI 🚀 New | 在 Colab 中打开 |
Ultralytics Youtube Video
| - -##
模型
- -在[COCO](https://docs.ultralytics.com/datasets/detect/coco)数据集上预训练的YOLOv8 [检测](https://docs.ultralytics.com/tasks/detect),[分割](https://docs.ultralytics.com/tasks/segment)和[姿态](https://docs.ultralytics.com/tasks/pose)模型可以在这里找到,以及在[ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet)数据集上预训练的YOLOv8 [分类](https://docs.ultralytics.com/tasks/classify)模型。所有的检测,分割和姿态模型都支持[追踪](https://docs.ultralytics.com/modes/track)模式。 - -Ultralytics YOLO supported tasks - -所有[模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)在首次使用时会自动从最新的Ultralytics [发布版本](https://github.com/ultralytics/assets/releases)下载。 - -
检测 (COCO) - -查看[检测文档](https://docs.ultralytics.com/tasks/detect/)以获取这些在[COCO](https://docs.ultralytics.com/datasets/detect/coco/)上训练的模型的使用示例,其中包括80个预训练类别。 - -| 模型 | 尺寸
(像素) | mAPval
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | -| ------------------------------------------------------------------------------------ | --------------- | -------------------- | --------------------------- | -------------------------------- | -------------- | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | - -- **mAPval** 值是基于单模型单尺度在 [COCO val2017](https://cocodataset.org) 数据集上的结果。
通过 `yolo val detect data=coco.yaml device=0` 复现 -- **速度** 是使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例对 COCO val 图像进行平均计算的。
通过 `yolo val detect data=coco.yaml batch=1 device=0|cpu` 复现 - -
- -
检测(Open Image V7) - -查看[检测文档](https://docs.ultralytics.com/tasks/detect/)以获取这些在[Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/)上训练的模型的使用示例,其中包括600个预训练类别。 - -| 模型 | 尺寸
(像素) | mAP验证
50-95 | 速度
CPU ONNX
(毫秒) | 速度
A100 TensorRT
(毫秒) | 参数
(M) | 浮点运算
(B) | -| ----------------------------------------------------------------------------------------- | --------------- | ------------------- | --------------------------- | -------------------------------- | -------------- | ---------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | - -- **mAP验证** 值适用于在[Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/)数据集上的单模型单尺度。
通过 `yolo val detect data=open-images-v7.yaml device=0` 以复现。 -- **速度** 在使用[Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)实例对Open Image V7验证图像进行平均测算。
通过 `yolo val detect data=open-images-v7.yaml batch=1 device=0|cpu` 以复现。 - -
- -
分割 (COCO) - -查看[分割文档](https://docs.ultralytics.com/tasks/segment/)以获取这些在[COCO-Seg](https://docs.ultralytics.com/datasets/segment/coco/)上训练的模型的使用示例,其中包括80个预训练类别。 - -| 模型 | 尺寸
(像素) | mAPbox
50-95 | mAPmask
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------- | -------------------- | --------------------- | --------------------------- | -------------------------------- | -------------- | ----------------- | -| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | -| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | -| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | -| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | -| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | - -- **mAPval** 值是基于单模型单尺度在 [COCO val2017](https://cocodataset.org) 数据集上的结果。
通过 `yolo val segment data=coco-seg.yaml device=0` 复现 -- **速度** 是使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例对 COCO val 图像进行平均计算的。
通过 `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu` 复现 - -
- -
姿态 (COCO) - -查看[姿态文档](https://docs.ultralytics.com/tasks/pose/)以获取这些在[COCO-Pose](https://docs.ultralytics.com/datasets/pose/coco/)上训练的模型的使用示例,其中包括1个预训练类别,即人。 - -| 模型 | 尺寸
(像素) | mAPpose
50-95 | mAPpose
50 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | -| ---------------------------------------------------------------------------------------------------- | --------------- | --------------------- | ------------------ | --------------------------- | -------------------------------- | -------------- | ----------------- | -| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | -| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | -| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | -| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | -| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | -| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | - -- **mAPval** 值是基于单模型单尺度在 [COCO Keypoints val2017](https://cocodataset.org) 数据集上的结果。
通过 `yolo val pose data=coco-pose.yaml device=0` 复现 -- **速度** 是使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例对 COCO val 图像进行平均计算的。
通过 `yolo val pose data=coco-pose.yaml batch=1 device=0|cpu` 复现 - -
- -
旋转检测 (DOTAv1) - -查看[旋转检测文档](https://docs.ultralytics.com/tasks/obb/)以获取这些在[DOTAv1](https://docs.ultralytics.com/datasets/obb/dota-v2/#dota-v10/)上训练的模型的使用示例,其中包括15个预训练类别。 - -| 模型 | 尺寸
(像素) | mAPtest
50 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------- | ------------------ | --------------------------- | -------------------------------- | -------------- | ----------------- | -| [YOLOv8n-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-obb.pt) | 1024 | 78.0 | 204.77 | 3.57 | 3.1 | 23.3 | -| [YOLOv8s-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-obb.pt) | 1024 | 79.5 | 424.88 | 4.07 | 11.4 | 76.3 | -| [YOLOv8m-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-obb.pt) | 1024 | 80.5 | 763.48 | 7.61 | 26.4 | 208.6 | -| [YOLOv8l-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-obb.pt) | 1024 | 80.7 | 1278.42 | 11.83 | 44.5 | 433.8 | -| [YOLOv8x-obb](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-obb.pt) | 1024 | 81.36 | 1759.10 | 13.23 | 69.5 | 676.7 | - -- **mAPval** 值是基于单模型多尺度在 [DOTAv1](https://captain-whu.github.io/DOTA/index.html) 数据集上的结果。
通过 `yolo val obb data=DOTAv1.yaml device=0 split=test` 复现 -- **速度** 是使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例对 COCO val 图像进行平均计算的。
通过 `yolo val obb data=DOTAv1.yaml batch=1 device=0|cpu` 复现 - -
- -
分类 (ImageNet) - -查看[分类文档](https://docs.ultralytics.com/tasks/classify/)以获取这些在[ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/)上训练的模型的使用示例,其中包括1000个预训练类别。 - -| 模型 | 尺寸
(像素) | acc
top1 | acc
top5 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) at 640 | -| -------------------------------------------------------------------------------------------- | --------------- | ---------------- | ---------------- | --------------------------- | -------------------------------- | -------------- | ------------------------ | -| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-cls.pt) | 224 | 69.0 | 88.3 | 12.9 | 0.31 | 2.7 | 4.3 | -| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-cls.pt) | 224 | 73.8 | 91.7 | 23.4 | 0.35 | 6.4 | 13.5 | -| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-cls.pt) | 224 | 76.8 | 93.5 | 85.4 | 0.62 | 17.0 | 42.7 | -| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-cls.pt) | 224 | 76.8 | 93.5 | 163.0 | 0.87 | 37.5 | 99.7 | -| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-cls.pt) | 224 | 79.0 | 94.6 | 232.0 | 1.01 | 57.4 | 154.8 | - -- **acc** 值是模型在 [ImageNet](https://www.image-net.org/) 数据集验证集上的准确率。
通过 `yolo val classify data=path/to/ImageNet device=0` 复现 -- **速度** 是使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例对 ImageNet val 图像进行平均计算的。
通过 `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` 复现 - -
- -##
集成
- -我们与领先的AI平台的关键整合扩展了Ultralytics产品的功能,增强了数据集标签化、训练、可视化和模型管理等任务。探索Ultralytics如何与[Roboflow](https://roboflow.com/?ref=ultralytics)、ClearML、[Comet](https://bit.ly/yolov8-readme-comet)、Neural Magic以及[OpenVINO](https://docs.ultralytics.com/integrations/openvino)合作,优化您的AI工作流程。 - -
- -Ultralytics active learning integrations -
-
- -
- - Roboflow logo - space - - ClearML logo - space - - Comet ML logo - space - - NeuralMagic logo -
- -| Roboflow | ClearML ⭐ NEW | Comet ⭐ NEW | Neural Magic ⭐ NEW | -| :--------------------------------------------------------------------------------: | :--------------------------------------------------------: | :----------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------: | -| 使用 [Roboflow](https://roboflow.com/?ref=ultralytics) 将您的自定义数据集直接标记并导出至 YOLOv8 进行训练 | 使用 [ClearML](https://clear.ml/)(开源!)自动跟踪、可视化,甚至远程训练 YOLOv8 | 免费且永久,[Comet](https://bit.ly/yolov8-readme-comet) 让您保存 YOLOv8 模型、恢复训练,并以交互式方式查看和调试预测 | 使用 [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) 使 YOLOv8 推理速度提高多达 6 倍 | - -##
Ultralytics HUB
- -体验 [Ultralytics HUB](https://bit.ly/ultralytics_hub) ⭐ 带来的无缝 AI,这是一个一体化解决方案,用于数据可视化、YOLOv5 和即将推出的 YOLOv8 🚀 模型训练和部署,无需任何编码。通过我们先进的平台和用户友好的 [Ultralytics 应用程序](https://ultralytics.com/app_install),轻松将图像转化为可操作的见解,并实现您的 AI 愿景。现在就开始您的**免费**之旅! - - -Ultralytics HUB preview image - -##
贡献
- -我们喜欢您的参与!没有社区的帮助,YOLOv5 和 YOLOv8 将无法实现。请参阅我们的[贡献指南](https://docs.ultralytics.com/help/contributing)以开始使用,并填写我们的[调查问卷](https://ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey)向我们提供您的使用体验反馈。感谢所有贡献者的支持!🙏 - - - - -Ultralytics open-source contributors - -##
许可证
- -Ultralytics 提供两种许可证选项以适应各种使用场景: - -- **AGPL-3.0 许可证**:这个[OSI 批准](https://opensource.org/licenses/)的开源许可证非常适合学生和爱好者,可以推动开放的协作和知识分享。请查看[LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 文件以了解更多细节。 -- **企业许可证**:专为商业用途设计,该许可证允许将 Ultralytics 的软件和 AI 模型无缝集成到商业产品和服务中,从而绕过 AGPL-3.0 的开源要求。如果您的场景涉及将我们的解决方案嵌入到商业产品中,请通过 [Ultralytics Licensing](https://ultralytics.com/license)与我们联系。 - -##
联系方式
- -对于 Ultralytics 的错误报告和功能请求,请访问 [GitHub Issues](https://github.com/ultralytics/ultralytics/issues),并加入我们的 [Discord](https://ultralytics.com/discord) 社区进行问题和讨论! - -
-
- Ultralytics GitHub - space - Ultralytics LinkedIn - space - Ultralytics Twitter - space - Ultralytics YouTube - space - Ultralytics TikTok - space - Ultralytics Instagram - space - Ultralytics Discord -
diff --git a/figures/latency.svg b/figures/latency.svg new file mode 100644 index 000000000..95fe327bb --- /dev/null +++ b/figures/latency.svg @@ -0,0 +1,2140 @@ + + + + + + + + 2024-05-23T18:13:19.661727 + image/svg+xml + + + Matplotlib v3.6.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/figures/params.svg b/figures/params.svg new file mode 100644 index 000000000..83a0445c0 --- /dev/null +++ b/figures/params.svg @@ -0,0 +1,2108 @@ + + + + + + + + 2024-05-23T18:13:26.684127 + image/svg+xml + + + Matplotlib v3.6.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..7ce1843e8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +torch +torchvision +onnx +onnxruntime +pycocotools +PyYAML +scipy +onnxsim +onnxruntime-gpu \ No newline at end of file diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index d25836a01..8ff1b4fb8 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -3,7 +3,7 @@ __version__ = "8.1.34" from ultralytics.data.explorer.explorer import Explorer -from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld +from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10 from ultralytics.models.fastsam import FastSAM from ultralytics.models.nas import NAS from ultralytics.utils import ASSETS, SETTINGS as settings @@ -23,4 +23,5 @@ "download", "settings", "Explorer", + "YOLOv10" ) diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 4dab8102d..175272ff4 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -549,6 +549,10 @@ def entrypoint(debug=""): from ultralytics import SAM model = SAM(model) + elif "yolov10" in stem: + from ultralytics import YOLOv10 + + model = YOLOv10(model) else: from ultralytics import YOLO diff --git a/ultralytics/cfg/models/v10/yolov10b.yaml b/ultralytics/cfg/models/v10/yolov10b.yaml new file mode 100644 index 000000000..a9dc72185 --- /dev/null +++ b/ultralytics/cfg/models/v10/yolov10b.yaml @@ -0,0 +1,40 @@ +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + b: [0.67, 1.00, 512] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2fCIB, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + - [-1, 1, PSA, [1024]] # 10 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2fCIB, [512, True]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 16 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium) + + - [-1, 1, SCDown, [512, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large) + + - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v10/yolov10l.yaml b/ultralytics/cfg/models/v10/yolov10l.yaml new file mode 100644 index 000000000..047de262b --- /dev/null +++ b/ultralytics/cfg/models/v10/yolov10l.yaml @@ -0,0 +1,40 @@ +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2fCIB, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + - [-1, 1, PSA, [1024]] # 10 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2fCIB, [512, True]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 16 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium) + + - [-1, 1, SCDown, [512, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large) + + - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v10/yolov10m.yaml b/ultralytics/cfg/models/v10/yolov10m.yaml new file mode 100644 index 000000000..5bdb5bf55 --- /dev/null +++ b/ultralytics/cfg/models/v10/yolov10m.yaml @@ -0,0 +1,43 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2fCIB, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + - [-1, 1, PSA, [1024]] # 10 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 16 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium) + + - [-1, 1, SCDown, [512, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large) + + - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v10/yolov10n.yaml b/ultralytics/cfg/models/v10/yolov10n.yaml new file mode 100644 index 000000000..1ee7437e7 --- /dev/null +++ b/ultralytics/cfg/models/v10/yolov10n.yaml @@ -0,0 +1,40 @@ +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + - [-1, 1, PSA, [1024]] # 10 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 16 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 19 (P4/16-medium) + + - [-1, 1, SCDown, [512, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large) + + - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v10/yolov10s.yaml b/ultralytics/cfg/models/v10/yolov10s.yaml new file mode 100644 index 000000000..c61e08cdc --- /dev/null +++ b/ultralytics/cfg/models/v10/yolov10s.yaml @@ -0,0 +1,39 @@ +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + s: [0.33, 0.50, 1024] + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2fCIB, [1024, True, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + - [-1, 1, PSA, [1024]] # 10 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 16 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 19 (P4/16-medium) + + - [-1, 1, SCDown, [512, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large) + + - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v10/yolov10x.yaml b/ultralytics/cfg/models/v10/yolov10x.yaml new file mode 100644 index 000000000..ab5fc8f06 --- /dev/null +++ b/ultralytics/cfg/models/v10/yolov10x.yaml @@ -0,0 +1,40 @@ +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + x: [1.00, 1.25, 512] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2fCIB, [512, True]] + - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2fCIB, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + - [-1, 1, PSA, [1024]] # 10 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2fCIB, [512, True]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 16 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium) + + - [-1, 1, SCDown, [512, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large) + + - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 859106a1a..1fa3f2e10 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -67,7 +67,7 @@ from ultralytics.data.dataset import YOLODataset from ultralytics.data.utils import check_det_dataset from ultralytics.nn.autobackend import check_class_names, default_class_names -from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder +from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder, v10Detect from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel from ultralytics.utils import ( ARM64, @@ -229,6 +229,9 @@ def __call__(self, model=None): m.dynamic = self.args.dynamic m.export = True m.format = self.args.format + if isinstance(m, v10Detect): + m.max_det = self.args.max_det + elif isinstance(m, C2f) and not any((saved_model, pb, tflite, edgetpu, tfjs)): # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph m.forward = m.forward_split diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 29d6f1d02..841ec120b 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -425,7 +425,8 @@ def _do_train(self, world_size=1): self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"]) # Validation - if self.args.val or final_epoch or self.stopper.possible_stop or self.stop: + if (self.args.val and (((epoch+1) % 10 == 0) or (self.epochs - epoch) <= 10)) \ + or final_epoch or self.stopper.possible_stop or self.stop: self.metrics, self.fitness = self.validate() self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr}) self.stop |= self.stopper(epoch + 1, self.fitness) or final_epoch diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 17666e385..aa329a419 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -196,10 +196,16 @@ def __call__(self, trainer=None, model=None): self.check_stats(stats) self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt))) self.finalize_metrics() - self.print_results() + # self.print_results() self.run_callbacks("on_val_end") if self.training: model.float() + assert(self.args.save_json and self.jdict) + with open(str(self.save_dir / "predictions.json"), "w") as f: + LOGGER.info(f"Saving {f.name}...") + json.dump(self.jdict, f) # flatten and save + stats = self.eval_json(stats) # update stats + stats['fitness'] = stats['metrics/mAP50-95(B)'] results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")} return {k: round(float(v), 5) for k, v in results.items()} # return results as 5 decimal place floats else: diff --git a/ultralytics/models/__init__.py b/ultralytics/models/__init__.py index b9b6eb357..42de3fbac 100644 --- a/ultralytics/models/__init__.py +++ b/ultralytics/models/__init__.py @@ -3,5 +3,6 @@ from .rtdetr import RTDETR from .sam import SAM from .yolo import YOLO, YOLOWorld +from .yolov10 import YOLOv10 -__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld" # allow simpler import +__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10" # allow simpler import diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py index 8226cd694..5550ec3ec 100644 --- a/ultralytics/models/yolo/detect/val.py +++ b/ultralytics/models/yolo/detect/val.py @@ -67,7 +67,7 @@ def init_metrics(self, model): val = self.data.get(self.args.split, "") # validation path self.is_coco = isinstance(val, str) and "coco" in val and val.endswith(f"{os.sep}val2017.txt") # is COCO self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) - self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.args.save_json |= self.is_coco # run on final val if training COCO self.names = model.names self.nc = len(model.names) self.metrics.names = self.names diff --git a/ultralytics/models/yolov10/__init__.py b/ultralytics/models/yolov10/__init__.py new file mode 100644 index 000000000..97f137f9c --- /dev/null +++ b/ultralytics/models/yolov10/__init__.py @@ -0,0 +1,5 @@ +from .model import YOLOv10 +from .predict import YOLOv10DetectionPredictor +from .val import YOLOv10DetectionValidator + +__all__ = "YOLOv10DetectionPredictor", "YOLOv10DetectionValidator", "YOLOv10" diff --git a/ultralytics/models/yolov10/model.py b/ultralytics/models/yolov10/model.py new file mode 100644 index 000000000..e1c3e28c7 --- /dev/null +++ b/ultralytics/models/yolov10/model.py @@ -0,0 +1,18 @@ +from ..yolo import YOLO +from ultralytics.nn.tasks import YOLOv10DetectionModel +from .val import YOLOv10DetectionValidator +from .predict import YOLOv10DetectionPredictor +from .train import YOLOv10DetectionTrainer + +class YOLOv10(YOLO): + @property + def task_map(self): + """Map head to model, trainer, validator, and predictor classes.""" + return { + "detect": { + "model": YOLOv10DetectionModel, + "trainer": YOLOv10DetectionTrainer, + "validator": YOLOv10DetectionValidator, + "predictor": YOLOv10DetectionPredictor, + }, + } \ No newline at end of file diff --git a/ultralytics/models/yolov10/predict.py b/ultralytics/models/yolov10/predict.py new file mode 100644 index 000000000..00de85377 --- /dev/null +++ b/ultralytics/models/yolov10/predict.py @@ -0,0 +1,38 @@ +from ultralytics.models.yolo.detect import DetectionPredictor +import torch +from ultralytics.utils import ops +from ultralytics.engine.results import Results + + +class YOLOv10DetectionPredictor(DetectionPredictor): + def postprocess(self, preds, img, orig_imgs): + if isinstance(preds, dict): + preds = preds["one2one"] + + if isinstance(preds, (list, tuple)): + preds = preds[0] + + if preds.shape[-1] == 6: + pass + else: + preds = preds.transpose(-1, -2) + bboxes, scores, labels = ops.v10postprocess(preds, self.args.max_det) + bboxes = ops.xywh2xyxy(bboxes) + preds = torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1) + + mask = preds[..., 4] > self.args.conf + + b, _, c = preds.shape + preds = preds.view(-1, preds.shape[-1])[mask.view(-1)] + preds = preds.view(b, -1, c) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + img_path = self.batch[0][i] + results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) + return results diff --git a/ultralytics/models/yolov10/train.py b/ultralytics/models/yolov10/train.py new file mode 100644 index 000000000..7305bcab5 --- /dev/null +++ b/ultralytics/models/yolov10/train.py @@ -0,0 +1,20 @@ +from ultralytics.models.yolo.detect import DetectionTrainer +from .val import YOLOv10DetectionValidator +from .model import YOLOv10DetectionModel +from copy import copy +from ultralytics.utils import RANK + +class YOLOv10DetectionTrainer(DetectionTrainer): + def get_validator(self): + """Returns a DetectionValidator for YOLO model validation.""" + self.loss_names = "box_om", "cls_om", "dfl_om", "box_oo", "cls_oo", "dfl_oo", + return YOLOv10DetectionValidator( + self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks + ) + + def get_model(self, cfg=None, weights=None, verbose=True): + """Return a YOLO detection model.""" + model = YOLOv10DetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1) + if weights: + model.load(weights) + return model diff --git a/ultralytics/models/yolov10/val.py b/ultralytics/models/yolov10/val.py new file mode 100644 index 000000000..bbe119922 --- /dev/null +++ b/ultralytics/models/yolov10/val.py @@ -0,0 +1,20 @@ +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.utils import ops +import torch + +class YOLOv10DetectionValidator(DetectionValidator): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.args.save_json |= self.is_coco + + def postprocess(self, preds): + if isinstance(preds, dict): + preds = preds["one2one"] + + if isinstance(preds, (list, tuple)): + preds = preds[0] + + preds = preds.transpose(-1, -2) + boxes, scores, labels = ops.v10postprocess(preds, self.args.max_det) + bboxes = ops.xywh2xyxy(boxes) + return torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1) \ No newline at end of file diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py index d785c008c..4a99bf595 100644 --- a/ultralytics/nn/modules/__init__.py +++ b/ultralytics/nn/modules/__init__.py @@ -46,6 +46,10 @@ CBFuse, CBLinear, Silence, + PSA, + C2fCIB, + SCDown, + RepVGGDW ) from .conv import ( CBAM, @@ -62,7 +66,7 @@ RepConv, SpatialAttention, ) -from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect +from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect from .transformer import ( AIFI, MLP, @@ -135,4 +139,9 @@ "CBFuse", "CBLinear", "Silence", + "PSA", + "C2fCIB", + "SCDown", + "RepVGGDW", + "v10Detect" ) diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index a263b6035..b76a95957 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -7,6 +7,7 @@ from .conv import Conv, DWConv, GhostConv, LightConv, RepConv, autopad from .transformer import TransformerBlock +from ultralytics.utils.torch_utils import fuse_conv_and_bn __all__ = ( "DFL", @@ -696,3 +697,131 @@ def forward(self, xs): res = [F.interpolate(x[self.idx[i]], size=target_size, mode="nearest") for i, x in enumerate(xs[:-1])] out = torch.sum(torch.stack(res + xs[-1:]), dim=0) return out + + +class RepVGGDW(torch.nn.Module): + def __init__(self, ed) -> None: + super().__init__() + self.conv = Conv(ed, ed, 7, 1, 3, g=ed, act=False) + self.conv1 = Conv(ed, ed, 3, 1, 1, g=ed, act=False) + self.dim = ed + self.act = nn.SiLU() + + def forward(self, x): + return self.act(self.conv(x) + self.conv1(x)) + + def forward_fuse(self, x): + return self.act(self.conv(x)) + + @torch.no_grad() + def fuse(self): + conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn) + conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn) + + conv_w = conv.weight + conv_b = conv.bias + conv1_w = conv1.weight + conv1_b = conv1.bias + + conv1_w = torch.nn.functional.pad(conv1_w, [2,2,2,2]) + + final_conv_w = conv_w + conv1_w + final_conv_b = conv_b + conv1_b + + conv.weight.data.copy_(final_conv_w) + conv.bias.data.copy_(final_conv_b) + + self.conv = conv + del self.conv1 + +class CIB(nn.Module): + """Standard bottleneck.""" + + def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False): + """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and + expansion. + """ + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = nn.Sequential( + Conv(c1, c1, 3, g=c1), + Conv(c1, 2 * c_, 1), + Conv(2 * c_, 2 * c_, 3, g=2 * c_) if not lk else RepVGGDW(2 * c_), + Conv(2 * c_, c2, 1), + Conv(c2, c2, 3, g=c2), + ) + + self.add = shortcut and c1 == c2 + + def forward(self, x): + """'forward()' applies the YOLO FPN to input data.""" + return x + self.cv1(x) if self.add else self.cv1(x) + +class C2fCIB(C2f): + """Faster Implementation of CSP Bottleneck with 2 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5): + """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups, + expansion. + """ + super().__init__(c1, c2, n, shortcut, g, e) + self.m = nn.ModuleList(CIB(self.c, self.c, shortcut, e=1.0, lk=lk) for _ in range(n)) + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, + attn_ratio=0.5): + super().__init__() + self.num_heads = num_heads + self.head_dim = dim // num_heads + self.key_dim = int(self.head_dim * attn_ratio) + self.scale = self.key_dim ** -0.5 + nh_kd = nh_kd = self.key_dim * num_heads + h = dim + nh_kd * 2 + self.qkv = Conv(dim, h, 1, act=False) + self.proj = Conv(dim, dim, 1, act=False) + self.pe = Conv(dim, dim, 3, 1, g=dim, act=False) + + def forward(self, x): + B, _, H, W = x.shape + N = H * W + qkv = self.qkv(x) + q, k, v = qkv.view(B, self.num_heads, -1, N).split([self.key_dim, self.key_dim, self.head_dim], dim=2) + + attn = ( + (q.transpose(-2, -1) @ k) * self.scale + ) + attn = attn.softmax(dim=-1) + x = (v @ attn.transpose(-2, -1)).view(B, -1, H, W) + self.pe(v.reshape(B, -1, H, W)) + x = self.proj(x) + return x + +class PSA(nn.Module): + + def __init__(self, c1, c2, e=0.5): + super().__init__() + assert(c1 == c2) + self.c = int(c1 * e) + self.cv1 = Conv(c1, 2 * self.c, 1, 1) + self.cv2 = Conv(2 * self.c, c1, 1) + + self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64) + self.ffn = nn.Sequential( + Conv(self.c, self.c*2, 1), + Conv(self.c*2, self.c, 1, act=False) + ) + + def forward(self, x): + a, b = self.cv1(x).split((self.c, self.c), dim=1) + b = b + self.attn(b) + b = b + self.ffn(b) + return self.cv2(torch.cat((a, b), 1)) + +class SCDown(nn.Module): + def __init__(self, c1, c2, k, s): + super().__init__() + self.cv1 = Conv(c1, c2, 1, 1) + self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False) + + def forward(self, x): + return self.cv2(self.cv1(x)) \ No newline at end of file diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 9cd794e4d..54b59d6cb 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -12,6 +12,8 @@ from .conv import Conv from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer from .utils import bias_init_with_prob, linear_init +import copy +from ultralytics.utils import ops __all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder" @@ -40,13 +42,7 @@ def __init__(self, nc=80, ch=()): self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() - def forward(self, x): - """Concatenates and returns predicted bounding boxes and class probabilities.""" - for i in range(self.nl): - x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) - if self.training: # Training path - return x - + def inference(self, x): # Inference path shape = x[0].shape # BCHW x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2) @@ -74,6 +70,21 @@ def forward(self, x): y = torch.cat((dbox, cls.sigmoid()), 1) return y if self.export else (y, x) + def forward_feat(self, x, cv2, cv3): + y = [] + for i in range(self.nl): + y.append(torch.cat((cv2[i](x[i]), cv3[i](x[i])), 1)) + return y + + def forward(self, x): + """Concatenates and returns predicted bounding boxes and class probabilities.""" + y = self.forward_feat(x, self.cv2, self.cv3) + + if self.training: + return y + + return self.inference(y) + def bias_init(self): """Initialize Detect() biases, WARNING: requires stride availability.""" m = self # self.model[-1] # Detect() module @@ -85,6 +96,8 @@ def bias_init(self): def decode_bboxes(self, bboxes, anchors): """Decode bounding boxes.""" + if self.export: + return dist2bbox(bboxes, anchors, xywh=False, dim=1) return dist2bbox(bboxes, anchors, xywh=True, dim=1) @@ -480,3 +493,43 @@ def _reset_parameters(self): xavier_uniform_(self.query_pos_head.layers[1].weight) for layer in self.input_proj: xavier_uniform_(layer[0].weight) + +class v10Detect(Detect): + + max_det = -1 + + def __init__(self, nc=80, ch=()): + super().__init__(nc, ch) + c3 = max(ch[0], min(self.nc, 100)) # channels + self.cv3 = nn.ModuleList(nn.Sequential(nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)), \ + nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)), \ + nn.Conv2d(c3, self.nc, 1)) for i, x in enumerate(ch)) + + self.one2one_cv2 = copy.deepcopy(self.cv2) + self.one2one_cv3 = copy.deepcopy(self.cv3) + + def forward(self, x): + one2one = self.forward_feat([xi.detach() for xi in x], self.one2one_cv2, self.one2one_cv3) + if not self.export: + one2many = super().forward(x) + + if not self.training: + one2one = self.inference(one2one) + if not self.export: + return {"one2many": one2many, "one2one": one2one} + else: + assert(self.max_det != -1) + boxes, scores, labels = ops.v10postprocess(one2one.permute(0, 2, 1), self.max_det) + return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1) + else: + return {"one2many": one2many, "one2one": one2one} + + def bias_init(self): + super().bias_init() + """Initialize Detect() biases, WARNING: requires stride availability.""" + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index f116ed2cf..99b21525e 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -49,10 +49,15 @@ CBFuse, CBLinear, Silence, + C2fCIB, + PSA, + SCDown, + RepVGGDW, + v10Detect ) from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml -from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss +from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss, v10DetectLoss from ultralytics.utils.plotting import feature_visualization from ultralytics.utils.torch_utils import ( fuse_conv_and_bn, @@ -191,6 +196,9 @@ def fuse(self, verbose=True): if isinstance(m, RepConv): m.fuse_convs() m.forward = m.forward_fuse # update forward + if isinstance(m, RepVGGDW): + m.fuse() + m.forward = m.forward_fuse self.info(verbose=verbose) return self @@ -294,6 +302,8 @@ def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True): # model, i s = 256 # 2x min stride m.inplace = self.inplace forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose, OBB)) else self.forward(x) + if isinstance(m, v10Detect): + forward = lambda x: self.forward(x)["one2many"] m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward self.stride = m.stride m.bias_init() # only run once @@ -627,6 +637,9 @@ def predict(self, x, profile=False, visualize=False, augment=False, embed=None): return torch.unbind(torch.cat(embeddings, 1), dim=0) return x +class YOLOv10DetectionModel(DetectionModel): + def init_criterion(self): + return v10DetectLoss(self) class Ensemble(nn.ModuleList): """Ensemble of models.""" @@ -869,6 +882,9 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) DWConvTranspose2d, C3x, RepC3, + PSA, + SCDown, + C2fCIB }: c1, c2 = ch[f], args[0] if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output) @@ -880,7 +896,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) ) # num heads args = [c1, c2, *args[1:]] - if m in (BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3): + if m in (BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3, C2fCIB): args.insert(2, n) # number of repeats n = 1 elif m is AIFI: @@ -897,7 +913,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) - elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn}: + elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}: args.append([ch[x] for x in f]) if m is Segment: args[2] = make_divisible(min(args[2], max_channels) * width, 8) @@ -936,7 +952,10 @@ def yaml_model_load(path): LOGGER.warning(f"WARNING ⚠️ Ultralytics YOLO P6 models now use -p6 suffix. Renaming {path.stem} to {new_stem}.") path = path.with_name(new_stem + path.suffix) - unified_path = re.sub(r"(\d+)([nslmx])(.+)?$", r"\1\3", str(path)) # i.e. yolov8x.yaml -> yolov8.yaml + if "v10" not in str(path): + unified_path = re.sub(r"(\d+)([nsblmx])(.+)?$", r"\1\3", str(path)) # i.e. yolov8x.yaml -> yolov8.yaml + else: + unified_path = path yaml_file = check_yaml(unified_path, hard=False) or check_yaml(path) d = yaml_load(yaml_file) # model dict d["scale"] = guess_model_scale(path) @@ -959,7 +978,7 @@ def guess_model_scale(model_path): with contextlib.suppress(AttributeError): import re - return re.search(r"yolov\d+([nslmx])", Path(model_path).stem).group(1) # n, s, m, l, or x + return re.search(r"yolov\d+([nsblmx])", Path(model_path).stem).group(1) # n, s, m, l, or x return "" @@ -982,7 +1001,7 @@ def cfg2task(cfg): m = cfg["head"][-1][-2].lower() # output module name if m in {"classify", "classifier", "cls", "fc"}: return "classify" - if m == "detect": + if m == "detect" or m == "v10detect": return "detect" if m == "segment": return "segment" @@ -1014,7 +1033,7 @@ def cfg2task(cfg): return "pose" elif isinstance(m, OBB): return "obb" - elif isinstance(m, (Detect, WorldDetect)): + elif isinstance(m, (Detect, WorldDetect, v10Detect)): return "detect" # Guess from model filename diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py index 360a292ab..d0ca9c392 100644 --- a/ultralytics/utils/loss.py +++ b/ultralytics/utils/loss.py @@ -147,7 +147,7 @@ def forward(self, pred_kpts, gt_kpts, kpt_mask, area): class v8DetectionLoss: """Criterion class for computing training losses.""" - def __init__(self, model): # model must be de-paralleled + def __init__(self, model, tal_topk=10): # model must be de-paralleled """Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function.""" device = next(model.parameters()).device # get model device h = model.args # hyperparameters @@ -163,7 +163,7 @@ def __init__(self, model): # model must be de-paralleled self.use_dfl = m.reg_max > 1 - self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0) + self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device) self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device) @@ -713,3 +713,15 @@ def bbox_decode(self, anchor_points, pred_dist, pred_angle): b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1) + +class v10DetectLoss: + def __init__(self, model): + self.one2many = v8DetectionLoss(model, tal_topk=10) + self.one2one = v8DetectionLoss(model, tal_topk=1) + + def __call__(self, preds, batch): + one2many = preds["one2many"] + loss_one2many = self.one2many(one2many, batch) + one2one = preds["one2one"] + loss_one2one = self.one2one(one2one, batch) + return loss_one2many[0] + loss_one2one[0], torch.cat((loss_one2many[1], loss_one2one[1])) diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index 439f94409..a539fe50f 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -847,3 +847,18 @@ def clean_str(s): (str): a string with special characters replaced by an underscore _ """ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) + +def v10postprocess(preds, max_det, nc=80): + assert(4 + nc == preds.shape[-1]) + boxes, scores = preds.split([4, nc], dim=-1) + max_scores = scores.amax(dim=-1) + max_scores, index = torch.topk(max_scores, max_det, axis=-1) + index = index.unsqueeze(-1) + boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1])) + scores = torch.gather(scores, dim=1, index=index.repeat(1, 1, scores.shape[-1])) + + scores, index = torch.topk(scores.flatten(1), max_det, axis=-1) + labels = index % nc + index = index // nc + boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1])) + return boxes, scores, labels \ No newline at end of file diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py index 9cee05008..b11c2b2c0 100644 --- a/ultralytics/utils/tal.py +++ b/ultralytics/utils/tal.py @@ -308,7 +308,8 @@ def make_anchors(feats, strides, grid_cell_offset=0.5): def dist2bbox(distance, anchor_points, xywh=True, dim=-1): """Transform distance(ltrb) to box(xywh or xyxy).""" - lt, rb = distance.chunk(2, dim) + assert(distance.shape[dim] == 4) + lt, rb = distance.split([2, 2], dim) x1y1 = anchor_points - lt x2y2 = anchor_points + rb if xywh: diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py index 77d8cc8cc..d476e1f84 100644 --- a/ultralytics/utils/torch_utils.py +++ b/ultralytics/utils/torch_utils.py @@ -310,10 +310,11 @@ def get_flops(model, imgsz=640): imgsz = [imgsz, imgsz] # expand if int/float try: # Use stride size for input tensor - stride = max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32 # max stride - im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format - flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1e9 * 2 # stride GFLOPs - return flops * imgsz[0] / stride * imgsz[1] / stride # imgsz GFLOPs + # stride = max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32 # max stride + # im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format + # flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1e9 * 2 # stride GFLOPs + # return flops * imgsz[0] / stride * imgsz[1] / stride # imgsz GFLOPs + raise Exception except Exception: # Use actual image size for input tensor (i.e. required for RTDETR models) im = torch.empty((1, p.shape[1], *imgsz), device=p.device) # input image in BCHW format