diff --git a/tools/train_net.py b/tools/train_net.py
new file mode 100644
index 00000000..c5f81dad
--- /dev/null
+++ b/tools/train_net.py
@@ -0,0 +1,10 @@
+from yolort.trainer import run
+
+
+def main():
+
+    run(cfg="yolort/v5/models/yolov5s.yaml", imgsz=224, weights="yolov5s.pt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/yolort/trainer/__init__.py b/yolort/trainer/__init__.py
index 34724c90..31efbf3d 100644
--- a/yolort/trainer/__init__.py
+++ b/yolort/trainer/__init__.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2021, yolort team. All rights reserved.
 
-from .lightning_task import DefaultTask
+# from .lightning_task import DefaultTask
 
-__all__ = ["DefaultTask"]
+# __all__ = ["DefaultTask"]
+
+from .train import run
diff --git a/yolort/trainer/train.py b/yolort/trainer/train.py
new file mode 100644
index 00000000..b0e4235e
--- /dev/null
+++ b/yolort/trainer/train.py
@@ -0,0 +1,802 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset.
+
+Models and datasets download automatically from the latest YOLOv5 release.
+Models: https://github.com/ultralytics/yolov5/tree/master/models
+Datasets: https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
+
+Usage:
+    # from pretrained (RECOMMENDED)
+    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
+    # from scratch
+    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640
+"""
+
+import argparse
+import math
+import os
+import random
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+
+sys.path.insert(0, "./yolov5")
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.cuda import amp
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+from yolort.v5.models.yolo import Model
+from yolort.v5.utils.autoanchor import check_anchors
+from yolort.v5.utils.downloads import attempt_download
+from yolort.v5.utils.general import (
+    LOGGER,
+    check_file,
+    check_img_size,
+    check_suffix,
+    check_yaml,
+    colorstr,
+    get_latest_run,
+    increment_path,
+    init_seeds,
+    intersect_dicts,
+    labels_to_class_weights,
+    labels_to_image_weights,
+    methods,
+    one_cycle,
+    print_args,
+    print_mutation,
+    strip_optimizer,
+)
+
+# from yolort.v5.utils.loggers.wandb.wandb_utils import check_wandb_resume
+from yolort.v5.utils.loss import ComputeLoss
+from yolort.v5.utils.metrics import fitness
+from yolort.v5.utils.plots import plot_evolve, plot_labels
+from yolort.v5.utils.torch_utils import (
+    EarlyStopping,
+    ModelEMA,
+    de_parallel,
+    select_device,
+    torch_distributed_zero_first,
+)
+
+from . import val  # for end-of-epoch mAP
+from .utils import (
+    Loggers,
+    attempt_load,
+    check_dataset,
+    check_git_status,
+    check_requirements,
+    check_train_batch_size,
+    create_dataloader,
+    Callbacks,
+    linear_lr,
+)
+
+LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv("RANK", -1))
+WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
+
+sys.path.insert(0, "yolort/v5")
+
+
+def train(hyp, opt, device, callbacks):  # path/to/hyp.yaml or hyp dictionary
+    (
+        save_dir,
+        epochs,
+        batch_size,
+        weights,
+        single_cls,
+        evolve,
+        data,
+        cfg,
+        resume,
+        noval,
+        nosave,
+        workers,
+        freeze,
+    ) = (
+        Path(opt.save_dir),
+        opt.epochs,
+        opt.batch_size,
+        opt.weights,
+        opt.single_cls,
+        opt.evolve,
+        opt.data,
+        opt.cfg,
+        opt.resume,
+        opt.noval,
+        opt.nosave,
+        opt.workers,
+        opt.freeze,
+    )
+
+    # Directories
+    w = save_dir / "weights"  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / "last.pt", w / "best.pt"
+
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
+
+    # Save run settings
+    if not evolve:
+        with open(save_dir / "hyp.yaml", "w") as f:
+            yaml.safe_dump(hyp, f, sort_keys=False)
+        with open(save_dir / "opt.yaml", "w") as f:
+            yaml.safe_dump(vars(opt), f, sort_keys=False)
+
+    # Loggers
+    data_dict = None
+    if RANK in [-1, 0]:
+        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
+        if loggers.wandb:
+            data_dict = loggers.wandb.data_dict
+            if resume:
+                weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
+
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+
+    # Config
+    plots = not evolve  # create plots
+    cuda = device.type != "cpu"
+    init_seeds(1 + RANK)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict["train"], data_dict["val"]
+    nc = 1 if single_cls else int(data_dict["nc"])  # number of classes
+    names = ["item"] if single_cls and len(data_dict["names"]) != 1 else data_dict["names"]  # class names
+    assert len(names) == nc, f"{len(names)} names found for nc={nc} dataset in {data}"  # check
+    is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt")  # COCO dataset
+
+    # Model
+    check_suffix(weights, ".pt")  # check weights
+
+    pretrained = weights.endswith(".pt")
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+
+        ckpt = torch.load(weights, map_location="cpu")  # load checkpoint to CPU to avoid CUDA memory leak
+        # create model
+
+        model = Model(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)
+        exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else []  # exclude keys
+        csd = ckpt["model"].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}")  # report
+    else:
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
+
+    # Freeze
+    freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        if any(x in k for x in freeze):
+            LOGGER.info(f"freezing {k}")
+            v.requires_grad = False
+
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz)
+        loggers.on_params_update({"batch_size": batch_size})
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp["weight_decay"] *= batch_size * accumulate / nbs  # scale weight_decay
+    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
+
+    g0, g1, g2 = [], [], []  # optimizer parameter groups
+    for v in model.modules():
+        if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):  # bias
+            g2.append(v.bias)
+        if isinstance(v, nn.BatchNorm2d):  # weight (no decay)
+            g0.append(v.weight)
+        elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):  # weight (with decay)
+            g1.append(v.weight)
+
+    if opt.optimizer == "Adam":
+        optimizer = Adam(g0, lr=hyp["lr0"], betas=(hyp["momentum"], 0.999))  # adjust beta1 to momentum
+    elif opt.optimizer == "AdamW":
+        optimizer = AdamW(g0, lr=hyp["lr0"], betas=(hyp["momentum"], 0.999))  # adjust beta1 to momentum
+    else:
+        optimizer = SGD(g0, lr=hyp["lr0"], momentum=hyp["momentum"], nesterov=True)
+
+    # add g1 with weight_decay
+    optimizer.add_param_group({"params": g1, "weight_decay": hyp["weight_decay"]})
+    optimizer.add_param_group({"params": g2})  # add g2 (biases)
+    LOGGER.info(
+        f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
+        f"{len(g0)} weight (no decay), {len(g1)} weight, {len(g2)} bias"
+    )
+    del g0, g1, g2
+
+    # Scheduler
+    if opt.linear_lr:
+        # lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp["lrf"]) + hyp["lrf"]  # linear
+        lr = linear_lr(hyp["lrf"], epochs)
+    else:
+        lf = one_cycle(1, hyp["lrf"], epochs)  # cosine 1->hyp['lrf']
+    scheduler = lr_scheduler.LambdaLR(
+        optimizer, lr_lambda=lf
+    )  # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    # EMA
+    ema = ModelEMA(model) if RANK in [-1, 0] else None
+
+    # Resume
+    start_epoch, best_fitness = 0, 0.0
+    if pretrained:
+        # Optimizer
+        if ckpt["optimizer"] is not None:
+            optimizer.load_state_dict(ckpt["optimizer"])
+            best_fitness = ckpt["best_fitness"]
+
+        # EMA
+        if ema and ckpt.get("ema"):
+            ema.ema.load_state_dict(ckpt["ema"].float().state_dict())
+            ema.updates = ckpt["updates"]
+
+        # Epochs
+        start_epoch = ckpt["epoch"] + 1
+        if resume:
+            assert start_epoch > 0, f"{weights} training to {epochs} epochs is finished, nothing to resume."
+        if epochs < start_epoch:
+            LOGGER.info(
+                f"{weights} has been trained for {ckpt['epoch']} epochs. "
+                f"Fine-tuning for {epochs} more epochs."
+            )
+            epochs += ckpt["epoch"]  # finetune additional epochs
+
+        del ckpt, csd
+
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning(
+            "WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n"
+            "See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started."
+        )
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info("Using SyncBatchNorm()")
+
+    # Trainloader
+    train_loader, dataset = create_dataloader(
+        train_path,
+        imgsz,
+        batch_size // WORLD_SIZE,
+        gs,
+        single_cls,
+        hyp=hyp,
+        augment=True,
+        cache=opt.cache,
+        rect=opt.rect,
+        rank=LOCAL_RANK,
+        workers=workers,
+        image_weights=opt.image_weights,
+        quad=opt.quad,
+        prefix=colorstr("train: "),
+        shuffle=True,
+    )
+    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
+    nb = len(train_loader)  # number of batches
+    assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
+
+    # Process 0
+    if RANK in [-1, 0]:
+        val_loader = create_dataloader(
+            val_path,
+            imgsz,
+            batch_size // WORLD_SIZE * 2,
+            gs,
+            single_cls,
+            hyp=hyp,
+            cache=None if noval else opt.cache,
+            rect=True,
+            rank=-1,
+            workers=workers,
+            pad=0.5,
+            prefix=colorstr("val: "),
+        )[0]
+
+        if not resume:
+            labels = np.concatenate(dataset.labels, 0)
+            # c = torch.tensor(labels[:, 0])  # classes
+            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
+            # model._initialize_biases(cf.to(device))
+            if plots:
+                plot_labels(labels, names, save_dir)
+
+            # Anchors
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz)
+            model.half().float()  # pre-reduce anchor precision
+
+        callbacks.run("on_pretrain_routine_end")
+
+    # DDP mode
+    if cuda and RANK != -1:
+        model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
+
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp["box"] *= 3 / nl  # scale to layers
+    hyp["cls"] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp["obj"] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp["label_smoothing"] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    # attach class weights
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
+    model.names = names
+
+    # Start training
+    t0 = time.time()
+    nw = max(
+        round(hyp["warmup_epochs"] * nb), 1000
+    )  # number of warmup iterations, max(3 epochs, 1k iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = amp.GradScaler(enabled=cuda)
+    stopper = EarlyStopping(patience=opt.patience)
+    compute_loss = ComputeLoss(model)  # init loss class
+    LOGGER.info(
+        f"Image sizes {imgsz} train, {imgsz} val\n"
+        f"Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n"
+        f"Logging results to {colorstr('bold', save_dir)}\n"
+        f"Starting training for {epochs} epochs..."
+    )
+    for epoch in range(start_epoch, epochs):  # epoch
+        model.train()
+
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(3, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(("\n" + "%10s" * 7) % ("Epoch", "gpu_mem", "box", "obj", "cls", "labels", "img_size"))
+        if RANK in [-1, 0]:
+            pbar = tqdm(pbar, total=nb, bar_format="{l_bar}{bar:10}{r_bar}{bar:-10b}")  # progress bar
+        optimizer.zero_grad()
+        for i, (
+            imgs,
+            targets,
+            paths,
+            _,
+        ) in pbar:  # batch
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x["lr"] = np.interp(
+                        ni, xi, [hyp["warmup_bias_lr"] if j == 2 else 0.0, x["initial_lr"] * lf(epoch)]
+                    )
+                    if "momentum" in x:
+                        x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [
+                        math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
+                    ]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
+
+            # Forward
+            with amp.autocast(enabled=cuda):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.0
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize
+            if ni - last_opt_step >= accumulate:
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+
+            # Log
+            if RANK in [-1, 0]:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                # in (GB) metric
+                mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G"
+                pbar.set_description(
+                    ("%10s" * 2 + "%10.4g" * 5)
+                    % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])
+                )
+                callbacks.run("on_train_batch_end", ni, model, imgs, targets, paths, plots, opt.sync_bn)
+                if callbacks.stop_training:
+                    return
+            # end batch
+
+        # Scheduler
+        lr = [x["lr"] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+
+        if RANK in [-1, 0]:
+            # mAP
+            callbacks.run("on_train_epoch_end", epoch=epoch)
+            ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = val.run(
+                    data_dict,
+                    batch_size=batch_size // WORLD_SIZE * 2,
+                    imgsz=imgsz,
+                    model=ema.ema,
+                    single_cls=single_cls,
+                    dataloader=val_loader,
+                    save_dir=save_dir,
+                    plots=False,
+                    callbacks=callbacks,
+                    compute_loss=compute_loss,
+                )
+
+            # Update best mAP
+            # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            fi = fitness(np.array(results).reshape(1, -1))
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            callbacks.run("on_fit_epoch_end", log_vals, epoch, best_fitness, fi)
+
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    "epoch": epoch,
+                    "best_fitness": best_fitness,
+                    "model": deepcopy(de_parallel(model)).half(),
+                    "ema": deepcopy(ema.ema).half(),
+                    "updates": ema.updates,
+                    "optimizer": optimizer.state_dict(),
+                    "wandb_id": loggers.wandb.wandb_run.id if loggers.wandb else None,
+                    "date": datetime.now().isoformat(),
+                }
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
+                    torch.save(ckpt, w / f"epoch{epoch}.pt")
+                del ckpt
+                callbacks.run("on_model_save", last, epoch, final_epoch, best_fitness, fi)
+
+            # Stop Single-GPU
+            if RANK == -1 and stopper(epoch=epoch, fitness=fi):
+                break
+
+            # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
+            # stop = stopper(epoch=epoch, fitness=fi)
+            # if RANK == 0:
+            #    dist.broadcast_object_list([stop], 0)  # broadcast 'stop' to all ranks
+
+        # Stop DPP
+        # with torch_distributed_zero_first(RANK):
+        # if stop:
+        #    break  # must break all DDP ranks
+
+        # end epoch
+    # end training
+    if RANK in [-1, 0]:
+        LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.")
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f"\nValidating {f}...")
+                    results, _, _ = val.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=True,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss,
+                    )  # val best model with plots
+                    if is_coco:
+                        callbacks.run(
+                            "on_fit_epoch_end", list(mloss) + list(results) + lr, epoch, best_fitness, fi
+                        )
+
+        callbacks.run("on_train_end", last, best, plots, epoch, results)
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+
+    torch.cuda.empty_cache()
+    return results
+
+
+def parse_opt(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", type=str, default="", help="initial weights path")
+    parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
+    parser.add_argument("--data", type=str, default="yolort/v5/data/coco128.yaml", help="dataset.yaml path")
+    parser.add_argument(
+        "--hyp", type=str, default="yolort/v5/data/hyps/hyp.scratch.yaml", help="hyperparameters path"
+    )
+    parser.add_argument("--epochs", type=int, default=100)
+    parser.add_argument(
+        "--batch-size", type=int, default=4, help="total batch size for all GPUs, -1 for autobatch"
+    )
+    parser.add_argument(
+        "--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)"
+    )
+    parser.add_argument("--rect", action="store_true", help="rectangular training")
+    parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training")
+    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
+    parser.add_argument("--noval", action="store_true", help="only validate final epoch")
+    parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
+    parser.add_argument(
+        "--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations"
+    )
+    parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
+    parser.add_argument(
+        "--cache", type=str, nargs="?", const="ram", help='--cache images in "ram" (default) or "disk"'
+    )
+    parser.add_argument(
+        "--image-weights", action="store_true", help="use weighted image selection for training"
+    )
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
+    parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class")
+    parser.add_argument(
+        "--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer"
+    )
+    parser.add_argument(
+        "--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode"
+    )
+    parser.add_argument(
+        "--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)"
+    )
+    parser.add_argument("--project", default="runs/train", help="save to project/name")  # delete ROOT /
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--quad", action="store_true", help="quad dataloader")
+    parser.add_argument("--linear-lr", action="store_true", help="linear LR")
+    parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
+    parser.add_argument(
+        "--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)"
+    )
+    parser.add_argument(
+        "--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2"
+    )
+    parser.add_argument(
+        "--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)"
+    )
+    parser.add_argument("--local_rank", type=int, default=-1, help="DDP parameter, do not modify")
+
+    # Weights & Biases arguments
+    parser.add_argument("--entity", default=None, help="W&B: Entity")
+    parser.add_argument(
+        "--upload_dataset", nargs="?", const=True, default=False, help='W&B: Upload data, "val" option'
+    )
+    parser.add_argument(
+        "--bbox_interval", type=int, default=-1, help="W&B: Set bounding-box image logging interval"
+    )
+    parser.add_argument(
+        "--artifact_alias", type=str, default="latest", help="W&B: Version of dataset artifact to use"
+    )
+
+    opt = parser.parse_known_args()[0] if known else parser.parse_args()
+    return opt
+
+
+def main(opt, callbacks=Callbacks()):
+    # Checks
+    if RANK in [-1, 0]:
+        print_args(FILE.stem, opt)
+        check_git_status()
+        check_requirements(exclude=["thop"])
+
+    # Resume
+    # if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
+    if opt.resume and not opt.evolve:  # resume an interrupted run
+        ckpt = (
+            opt.resume if isinstance(opt.resume, str) else get_latest_run()
+        )  # specified or most recent path
+        assert os.path.isfile(ckpt), "ERROR: --resume checkpoint does not exist"
+        with open(Path(ckpt).parent.parent / "opt.yaml", errors="ignore") as f:
+            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
+        opt.cfg, opt.weights, opt.resume = "", ckpt, True  # reinstate
+        LOGGER.info(f"Resuming training from {ckpt}")
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
+            check_file(opt.data),
+            check_yaml(opt.cfg),
+            check_yaml(opt.hyp),
+            str(opt.weights),
+            str(opt.project),
+        )  # checks
+        assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
+        if opt.evolve:
+            if opt.project == str("runs/train"):  # if default project name, rename to runs/evolve
+                opt.project = str("runs/evolve")
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
+        assert not opt.image_weights, f"--image-weights {msg}"
+        assert not opt.evolve, f"--evolve {msg}"
+        assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
+        assert (
+            opt.batch_size % WORLD_SIZE == 0
+        ), f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
+        assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device("cuda", LOCAL_RANK)
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+        if WORLD_SIZE > 1 and RANK == 0:
+            LOGGER.info("Destroying process group... ")
+            dist.destroy_process_group()
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {
+            "lr0": (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            "lrf": (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            "momentum": (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+            "weight_decay": (1, 0.0, 0.001),  # optimizer weight decay
+            "warmup_epochs": (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+            "warmup_momentum": (1, 0.0, 0.95),  # warmup initial momentum
+            "warmup_bias_lr": (1, 0.0, 0.2),  # warmup initial bias lr
+            "box": (1, 0.02, 0.2),  # box loss gain
+            "cls": (1, 0.2, 4.0),  # cls loss gain
+            "cls_pw": (1, 0.5, 2.0),  # cls BCELoss positive_weight
+            "obj": (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            "obj_pw": (1, 0.5, 2.0),  # obj BCELoss positive_weight
+            "iou_t": (0, 0.1, 0.7),  # IoU training threshold
+            "anchor_t": (1, 2.0, 8.0),  # anchor-multiple threshold
+            "anchors": (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            "fl_gamma": (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            "hsv_h": (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            "hsv_s": (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            "hsv_v": (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            "degrees": (1, 0.0, 45.0),  # image rotation (+/- deg)
+            "translate": (1, 0.0, 0.9),  # image translation (+/- fraction)
+            "scale": (1, 0.0, 0.9),  # image scale (+/- gain)
+            "shear": (1, 0.0, 10.0),  # image shear (+/- deg)
+            "perspective": (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            "flipud": (1, 0.0, 1.0),  # image flip up-down (probability)
+            "fliplr": (0, 0.0, 1.0),  # image flip left-right (probability)
+            "mosaic": (1, 0.0, 1.0),  # image mixup (probability)
+            "mixup": (1, 0.0, 1.0),  # image mixup (probability)
+            "copy_paste": (1, 0.0, 1.0),
+        }  # segment copy-paste (probability)
+
+        with open(opt.hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if "anchors" not in hyp:  # anchors commented in hyp.yaml
+                hyp["anchors"] = 3
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / "hyp_evolve.yaml", save_dir / "evolve.csv"
+        if opt.bucket:
+            os.system(f"gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}")  # download evolve.csv if exists
+
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
+                # Select parent(s)
+                parent = "single"  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=",", skiprows=1)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min() + 1e-6  # weights (sum > 0)
+                if parent == "single" or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == "weighted":
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 7] * v[i])  # mutate
+
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+
+            # Train mutation
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
+            # Write mutation results
+            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
+
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(
+            f"Hyperparameter evolution finished\n"
+            f"Results saved to {colorstr('bold', save_dir)}\n"
+            f"Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}"
+        )
+
+
+def run(**kwargs):
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+
+    main(opt)
+    return opt
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/yolort/trainer/utils.py b/yolort/trainer/utils.py
new file mode 100644
index 00000000..fb9b9da1
--- /dev/null
+++ b/yolort/trainer/utils.py
@@ -0,0 +1,1495 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Auto-batch utils
+"""
+
+import contextlib
+import glob
+import hashlib
+import os
+import platform
+import random
+import warnings
+from copy import deepcopy
+from itertools import repeat
+from multiprocessing.pool import Pool, ThreadPool
+from pathlib import Path
+from subprocess import check_output
+from threading import Thread
+from zipfile import ZipFile
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import pkg_resources as pkg
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import yaml
+from PIL import Image, ImageOps
+from torch.cuda import amp
+from torch.utils.data import DataLoader, Dataset, dataloader, distributed
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+from yolort.v5.models.common import Conv
+from yolort.v5.models.experimental import Ensemble
+from yolort.v5.utils.augmentations import (
+    Albumentations,
+    augment_hsv,
+    copy_paste,
+    letterbox,
+    mixup,
+    random_perspective,
+)
+from yolort.v5.utils.downloads import attempt_download
+from yolort.v5.utils.general import (
+    LOGGER,
+    check_version,
+    colorstr,
+    segments2boxes,
+    xyn2xy,
+    xywhn2xyxy,
+    xyxy2xywhn,
+)
+from yolort.v5.utils.plots import plot_images, plot_results
+from yolort.v5.utils.torch_utils import de_parallel, profile, torch_distributed_zero_first
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+DATASETS_DIR = ROOT.parent / "datasets"  # YOLOv5 datasets directory
+NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of YOLOv5 multiprocessing threads
+HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data"
+# acceptable image suffixes
+IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
+# acceptable video suffixes
+VID_FORMATS = ["mov", "avi", "mp4", "mpg", "mpeg", "m4v", "wmv", "mkv"]
+WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))  # DPP
+
+LOGGERS = ("csv", "tb", "wandb")  # text-file, TensorBoard, Weights & Biases
+RANK = int(os.getenv("RANK", -1))
+
+
+try:
+    import wandb
+
+    assert hasattr(wandb, "__version__")  # verify package import not local dir
+    if pkg.parse_version(wandb.__version__) >= pkg.parse_version("0.12.2") and RANK in [0, -1]:
+        try:
+            wandb_login_success = wandb.login(timeout=30)
+        except wandb.errors.UsageError:  # known non-TTY terminal issue
+            wandb_login_success = False
+        if not wandb_login_success:
+            wandb = None
+except (ImportError, AssertionError):
+    wandb = None
+
+
+def check_train_batch_size(model, imgsz=640):
+    # Check YOLOv5 training batch size
+    with amp.autocast():
+        return autobatch(deepcopy(model).train(), imgsz)  # compute optimal batch size
+
+
+def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
+    # Automatically estimate best batch size to use `fraction` of available CUDA memory
+    # Usage:
+    #     import torch
+    #     from utils.autobatch import autobatch
+    #     model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
+    #     print(autobatch(model))
+
+    prefix = colorstr("AutoBatch: ")
+    LOGGER.info(f"{prefix}Computing optimal batch size for --imgsz {imgsz}")
+    device = next(model.parameters()).device  # get model device
+    if device.type == "cpu":
+        LOGGER.info(f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}")
+        return batch_size
+
+    d = str(device).upper()  # 'CUDA:0'
+    properties = torch.cuda.get_device_properties(device)  # device properties
+    t = properties.total_memory / 1024 ** 3  # (GiB)
+    r = torch.cuda.memory_reserved(device) / 1024 ** 3  # (GiB)
+    a = torch.cuda.memory_allocated(device) / 1024 ** 3  # (GiB)
+    f = t - (r + a)  # free inside reserved
+    LOGGER.info(
+        f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, "
+        f"{a:.2f}G allocated, {f:.2f}G free"
+    )
+
+    batch_sizes = [1, 2, 4, 8, 16]
+    try:
+        img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes]
+        y = profile(img, model, n=3, device=device)
+    except Exception as e:
+        LOGGER.warning(f"{prefix}{e}")
+
+    y = [x[2] for x in y if x]  # memory [2]
+    batch_sizes = batch_sizes[: len(y)]
+    p = np.polyfit(batch_sizes, y, deg=1)  # first degree polynomial fit
+    b = int((f * fraction - p[1]) / p[0])  # y intercept (optimal batch size)
+    LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%)")
+    return b
+
+
+def create_dataloader(
+    path,
+    imgsz,
+    batch_size,
+    stride,
+    single_cls=False,
+    hyp=None,
+    augment=False,
+    cache=False,
+    pad=0.0,
+    rect=False,
+    rank=-1,
+    workers=8,
+    image_weights=False,
+    quad=False,
+    prefix="",
+    shuffle=False,
+):
+    if rect and shuffle:
+        LOGGER.warning("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
+        shuffle = False
+    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+        dataset = LoadImagesAndLabels(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augmentation
+            hyp=hyp,  # hyperparameters
+            rect=rect,  # rectangular batches
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix,
+        )
+
+    batch_size = min(batch_size, len(dataset))
+    nd = torch.cuda.device_count()  # number of CUDA devices
+    nw = min(
+        [os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]
+    )  # number of workers
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    loader = (
+        DataLoader if image_weights else InfiniteDataLoader
+    )  # only DataLoader allows for attribute updates
+    return (
+        loader(
+            dataset,
+            batch_size=batch_size,
+            shuffle=shuffle and sampler is None,
+            num_workers=nw,
+            sampler=sampler,
+            pin_memory=True,
+            collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
+        ),
+        dataset,
+    )
+
+
+def exif_size(img):
+    # Returns exif-corrected PIL size
+    s = img.size  # (width, height)
+    try:
+        rotation = dict(img._getexif().items())[orientation]
+        if rotation == 6:  # rotation 270
+            s = (s[1], s[0])
+        elif rotation == 8:  # rotation 90
+            s = (s[1], s[0])
+    except Exception:
+        pass
+
+    return s
+
+
+def img2label_paths(img_paths):
+    # Define label paths as a function of image paths
+    sa, sb = os.sep + "images" + os.sep, os.sep + "labels" + os.sep  # /images/, /labels/ substrings
+    return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
+
+
+def get_hash(paths):
+    # Returns a single hash value of a list of paths (files or dirs)
+    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+    h = hashlib.md5(str(size).encode())  # hash sizes
+    h.update("".join(paths).encode())  # hash paths
+    return h.hexdigest()  # return hash
+
+
+def verify_image_label(args):
+    # Verify one image-label pair
+    im_file, lb_file, prefix = args
+    # number (missing, found, empty, corrupt), message, segments
+    nm, nf, ne, nc, msg, segments = (0, 0, 0, 0, "", [])
+    try:
+        # verify images
+        im = Image.open(im_file)
+        im.verify()  # PIL verify
+        shape = exif_size(im)  # image size
+        assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+        assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
+        if im.format.lower() in ("jpg", "jpeg"):
+            with open(im_file, "rb") as f:
+                f.seek(-2, 2)
+                if f.read() != b"\xff\xd9":  # corrupt JPEG
+                    ImageOps.exif_transpose(Image.open(im_file)).save(
+                        im_file, "JPEG", subsampling=0, quality=100
+                    )
+                    msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved"
+
+        # verify labels
+        if os.path.isfile(lb_file):
+            nf = 1  # label found
+            with open(lb_file) as f:
+                lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
+                if any([len(x) > 8 for x in lb]):  # is segment
+                    classes = np.array([x[0] for x in lb], dtype=np.float32)
+                    segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)
+                    lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
+                lb = np.array(lb, dtype=np.float32)
+            nl = len(lb)
+            if nl:
+                assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected"
+                assert (lb >= 0).all(), f"negative label values {lb[lb < 0]}"
+                assert (
+                    lb[:, 1:] <= 1
+                ).all(), f"non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}"
+                _, i = np.unique(lb, axis=0, return_index=True)
+                if len(i) < nl:  # duplicate row check
+                    lb = lb[i]  # remove duplicates
+                    if segments:
+                        segments = segments[i]
+                    msg = f"{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed"
+            else:
+                ne = 1  # label empty
+                lb = np.zeros((0, 5), dtype=np.float32)
+        else:
+            nm = 1  # label missing
+            lb = np.zeros((0, 5), dtype=np.float32)
+        return im_file, lb, shape, segments, nm, nf, ne, nc, msg
+    except Exception as e:
+        nc = 1
+        msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}"
+        return [None, None, None, None, nm, nf, ne, nc, msg]
+
+
+class LoadImagesAndLabels(Dataset):
+    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
+    cache_version = 0.6  # dataset labels *.cache version
+
+    def __init__(
+        self,
+        path,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        image_weights=False,
+        cache_images=False,
+        single_cls=False,
+        stride=32,
+        pad=0.0,
+        prefix="",
+    ):
+        self.img_size = img_size
+        self.augment = augment
+        self.hyp = hyp
+        self.image_weights = image_weights
+        self.rect = False if image_weights else rect
+        self.mosaic = (
+            self.augment and not self.rect
+        )  # load 4 images at a time into a mosaic (only during training)
+        self.mosaic_border = [-img_size // 2, -img_size // 2]
+        self.stride = stride
+        self.path = path
+        self.albumentations = Albumentations() if augment else None
+
+        try:
+            f = []  # image files
+            for p in path if isinstance(path, list) else [path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / "**" / "*.*"), recursive=True)
+                    # f = list(p.rglob('*.*'))  # pathlib
+                elif p.is_file():  # file
+                    with open(p) as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [
+                            x.replace("./", parent) if x.startswith("./") else x for x in t
+                        ]  # local to global path
+                        # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
+                else:
+                    raise Exception(f"{prefix}{p} does not exist")
+            self.img_files = sorted(
+                x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS
+            )
+            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
+            assert self.img_files, f"{prefix}No images found"
+        except Exception as e:
+            raise Exception(f"{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}")
+
+        # Check cache
+        self.label_files = img2label_paths(self.img_files)  # labels
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache")
+        try:
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+
+            assert cache["version"] == self.cache_version  # same version
+            assert cache["hash"] == get_hash(self.label_files + self.img_files)  # same hash
+        except Exception:
+            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupt, total
+        if exists:
+            d = (
+                f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, "
+                f"{ne} empty, {nc} corrupt"
+            )
+            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
+            if cache["msgs"]:
+                LOGGER.info("\n".join(cache["msgs"]))  # display warnings
+        assert (
+            nf > 0 or not augment
+        ), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}"
+
+        # Read cache
+        [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
+        labels, shapes, self.segments = zip(*cache.values())
+        self.labels = list(labels)
+        self.shapes = np.array(shapes, dtype=np.float64)
+        self.img_files = list(cache.keys())  # update
+        self.label_files = img2label_paths(cache.keys())  # update
+        n = len(shapes)  # number of images
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        nb = bi[-1] + 1  # number of batches
+        self.batch = bi  # batch index of image
+        self.n = n
+        self.indices = range(n)
+
+        # Update labels
+        include_class = []  # filter labels to include only these classes (optional)
+        include_class_array = np.array(include_class).reshape(1, -1)
+        for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
+            if include_class:
+                j = (label[:, 0:1] == include_class_array).any(1)
+                self.labels[i] = label[j]
+                if segment:
+                    self.segments[i] = segment[j]
+            if single_cls:  # single-class training, merge all classes into 0
+                self.labels[i][:, 0] = 0
+                if segment:
+                    self.segments[i][:, 0] = 0
+
+        # Rectangular Training
+        if self.rect:
+            # Sort by aspect ratio
+            s = self.shapes  # wh
+            ar = s[:, 1] / s[:, 0]  # aspect ratio
+            irect = ar.argsort()
+            self.img_files = [self.img_files[i] for i in irect]
+            self.label_files = [self.label_files[i] for i in irect]
+            self.labels = [self.labels[i] for i in irect]
+            self.shapes = s[irect]  # wh
+            ar = ar[irect]
+
+            # Set training image shapes
+            shapes = [[1, 1]] * nb
+            for i in range(nb):
+                ari = ar[bi == i]
+                mini, maxi = ari.min(), ari.max()
+                if maxi < 1:
+                    shapes[i] = [maxi, 1]
+                elif mini > 1:
+                    shapes[i] = [1, 1 / mini]
+
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+
+        # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
+        self.imgs, self.img_npy = [None] * n, [None] * n
+        if cache_images:
+            if cache_images == "disk":
+                self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy")
+                self.img_npy = [self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files]
+                self.im_cache_dir.mkdir(parents=True, exist_ok=True)
+            gb = 0  # Gigabytes of cached images
+            self.img_hw0, self.img_hw = [None] * n, [None] * n
+            results = ThreadPool(NUM_THREADS).imap(self.load_image, range(n))
+            pbar = tqdm(enumerate(results), total=n)
+            for i, x in pbar:
+                if cache_images == "disk":
+                    if not self.img_npy[i].exists():
+                        np.save(self.img_npy[i].as_posix(), x[0])
+                    gb += self.img_npy[i].stat().st_size
+                else:  # 'ram'
+                    (
+                        self.imgs[i],
+                        self.img_hw0[i],
+                        self.img_hw[i],
+                    ) = x  # im, hw_orig, hw_resized = load_image(self, i)
+                    gb += self.imgs[i].nbytes
+                pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})"
+            pbar.close()
+
+    def cache_labels(self, path=Path("./labels.cache"), prefix=""):
+        # Cache dataset labels, check images and read shapes
+        x = {}  # dict
+        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
+        desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
+        with Pool(NUM_THREADS) as pool:
+            pbar = tqdm(
+                pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
+                desc=desc,
+                total=len(self.img_files),
+            )
+            for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
+                nm += nm_f
+                nf += nf_f
+                ne += ne_f
+                nc += nc_f
+                if im_file:
+                    x[im_file] = [lb, shape, segments]
+                if msg:
+                    msgs.append(msg)
+                pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupt"
+
+        pbar.close()
+        if msgs:
+            LOGGER.info("\n".join(msgs))
+        if nf == 0:
+            LOGGER.warning(f"{prefix}WARNING: No labels found in {path}. See {HELP_URL}")
+        x["hash"] = get_hash(self.label_files + self.img_files)
+        x["results"] = nf, nm, ne, nc, len(self.img_files)
+        x["msgs"] = msgs  # warnings
+        x["version"] = self.cache_version  # cache version
+        try:
+            np.save(path, x)  # save cache for next time
+            path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
+            LOGGER.info(f"{prefix}New cache created: {path}")
+        except Exception as e:
+            LOGGER.warning(
+                f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}"
+            )  # not writeable
+        return x
+
+    def __len__(self):
+        return len(self.img_files)
+
+    # def __iter__(self):
+    #     self.count = -1
+    #     print('ran dataset iter')
+    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
+    #     return self
+
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        mosaic = self.mosaic and random.random() < hyp["mosaic"]
+        if mosaic:
+            # Load mosaic
+            img, labels = self.load_mosaic(index)
+            shapes = None
+
+            # MixUp augmentation
+            if random.random() < hyp["mixup"]:
+                img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = self.load_image(index)
+
+            # Letterbox
+            shape = (
+                self.batch_shapes[self.batch[index]] if self.rect else self.img_size
+            )  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(
+                    labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]
+                )
+
+            if self.augment:
+                img, labels = random_perspective(
+                    img,
+                    labels,
+                    degrees=hyp["degrees"],
+                    translate=hyp["translate"],
+                    scale=hyp["scale"],
+                    shear=hyp["shear"],
+                    perspective=hyp["perspective"],
+                )
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
+
+        if self.augment:
+            # Albumentations
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
+
+            # Flip up-down
+            if random.random() < hyp["flipud"]:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+            # Flip left-right
+            if random.random() < hyp["fliplr"]:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # Cutouts
+            # labels = cutout(img, labels, p=0.5)
+            # nl = len(labels)  # update after cutout
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
+
+    def load_image(self, i):
+        # loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
+        im = self.imgs[i]
+        if im is None:  # not cached in RAM
+            npy = self.img_npy[i]
+            if npy and npy.exists():  # load npy
+                im = np.load(npy)
+            else:  # read image
+                f = self.img_files[i]
+                im = cv2.imread(f)  # BGR
+                assert im is not None, f"Image Not Found {f}"
+            h0, w0 = im.shape[:2]  # orig hw
+            r = self.img_size / max(h0, w0)  # ratio
+            if r != 1:  # if sizes are not equal
+                im = cv2.resize(
+                    im,
+                    (int(w0 * r), int(h0 * r)),
+                    interpolation=cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA,
+                )
+            return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
+        else:
+            return self.imgs[i], self.img_hw0[i], self.img_hw[i]  # im, hw_original, hw_resized
+
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4, segments4 = [], []
+        s = self.img_size
+        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
+        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+        random.shuffle(indices)
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = (
+                    max(xc - w, 0),
+                    max(yc - h, 0),
+                    xc,
+                    yc,
+                )  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = (
+                    w - (x2a - x1a),
+                    h - (y2a - y1a),
+                    w,
+                    h,
+                )  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # Labels
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(
+                    labels[:, 1:], w, h, padw, padh
+                )  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+            labels4.append(labels)
+            segments4.extend(segments)
+
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, 1:], *segments4):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+
+        # Augment
+        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
+        img4, labels4 = random_perspective(
+            img4,
+            labels4,
+            segments4,
+            degrees=self.hyp["degrees"],
+            translate=self.hyp["translate"],
+            scale=self.hyp["scale"],
+            shear=self.hyp["shear"],
+            perspective=self.hyp["perspective"],
+            border=self.mosaic_border,
+        )  # border to remove
+
+        return img4, labels4
+
+    def load_mosaic9(self, index):
+        # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
+        labels9, segments9 = [], []
+        s = self.img_size
+        indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
+        random.shuffle(indices)
+        hp, wp = -1, -1  # height, width previous
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img9
+            if i == 0:  # center
+                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                h0, w0 = h, w
+                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+            elif i == 1:  # top
+                c = s, s - h, s + w, s
+            elif i == 2:  # top right
+                c = s + wp, s - h, s + wp + w, s
+            elif i == 3:  # right
+                c = s + w0, s, s + w0 + w, s + h
+            elif i == 4:  # bottom right
+                c = s + w0, s + hp, s + w0 + w, s + hp + h
+            elif i == 5:  # bottom
+                c = s + w0 - w, s + h0, s + w0, s + h0 + h
+            elif i == 6:  # bottom left
+                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+            elif i == 7:  # left
+                c = s - w, s + h0 - h, s, s + h0
+            elif i == 8:  # top left
+                c = s - w, s + h0 - hp - h, s, s + h0 - hp
+
+            padx, pady = c[:2]
+            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
+
+            # Labels
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(
+                    labels[:, 1:], w, h, padx, pady
+                )  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
+            labels9.append(labels)
+            segments9.extend(segments)
+
+            # Image
+            img9[y1:y2, x1:x2] = img[y1 - pady :, x1 - padx :]  # img9[ymin:ymax, xmin:xmax]
+            hp, wp = h, w  # height, width previous
+
+        # Offset
+        yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border)  # mosaic center x, y
+        img9 = img9[yc : yc + 2 * s, xc : xc + 2 * s]
+
+        # Concat/clip labels
+        labels9 = np.concatenate(labels9, 0)
+        labels9[:, [1, 3]] -= xc
+        labels9[:, [2, 4]] -= yc
+        c = np.array([xc, yc])  # centers
+        segments9 = [x - c for x in segments9]
+
+        for x in (labels9[:, 1:], *segments9):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img9, labels9 = replicate(img9, labels9)  # replicate
+
+        # Augment
+        img9, labels9 = random_perspective(
+            img9,
+            labels9,
+            segments9,
+            degrees=self.hyp["degrees"],
+            translate=self.hyp["translate"],
+            scale=self.hyp["scale"],
+            shear=self.hyp["shear"],
+            perspective=self.hyp["perspective"],
+            border=self.mosaic_border,
+        )  # border to remove
+
+        return img9, labels9
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes = zip(*batch)  # transposed
+        for i, lb in enumerate(label):
+            lb[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
+
+    @staticmethod
+    def collate_fn4(batch):
+        img, label, path, shapes = zip(*batch)  # transposed
+        n = len(shapes) // 4
+        img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
+
+        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
+        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
+        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
+        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
+            i *= 4
+            if random.random() < 0.5:
+                im = F.interpolate(
+                    img[i].unsqueeze(0).float(), scale_factor=2.0, mode="bilinear", align_corners=False
+                )[0].type(img[i].type())
+                lb = label[i]
+            else:
+                im = torch.cat(
+                    (torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2
+                )
+                lb = (
+                    torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
+                )
+            img4.append(im)
+            label4.append(lb)
+
+        for i, lb in enumerate(label4):
+            lb[:, 0] = i  # add target image index for build_targets()
+
+        return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
+
+
+class InfiniteDataLoader(dataloader.DataLoader):
+    """Dataloader that reuses workers
+
+    Uses same syntax as vanilla DataLoader
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
+        self.iterator = super().__iter__()
+
+    def __len__(self):
+        return len(self.batch_sampler.sampler)
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield next(self.iterator)
+
+
+class _RepeatSampler:
+    """Sampler that repeats forever
+
+    Args:
+        sampler (Sampler)
+    """
+
+    def __init__(self, sampler):
+        self.sampler = sampler
+
+    def __iter__(self):
+        while True:
+            yield from iter(self.sampler)
+
+
+def attempt_load(weights, map_location=None, inplace=True, fuse=True):
+    from models.yolo import Detect, Model
+
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        ckpt = torch.load(attempt_download(w), map_location=map_location)  # load
+        if fuse:
+            model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().fuse().eval())  # FP32 model
+        else:
+            model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())  # without layer fuse
+
+    # Compatibility updates
+    for m in model.modules():
+        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
+            m.inplace = inplace  # pytorch 1.7.0 compatibility
+            if type(m) is Detect:
+                if not isinstance(m.anchor_grid, list):  # new Detect Layer compatibility
+                    delattr(m, "anchor_grid")
+                    setattr(m, "anchor_grid", [torch.zeros(1)] * m.nl)
+        elif type(m) is Conv:
+            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+
+    if len(model) == 1:
+        return model[-1]  # return model
+    else:
+        print(f"Ensemble created with {weights}\n")
+        for k in ["names"]:
+            setattr(model, k, getattr(model[-1], k))
+        model.stride = model[
+            torch.argmax(torch.tensor([m.stride.max() for m in model])).int()
+        ].stride  # max stride
+        return model  # return ensemble
+
+
+def check_dataset(data, autodownload=True):
+    # Download and/or unzip dataset if not found locally
+    # Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
+
+    # Download (optional)
+    extract_dir = ""
+    if isinstance(data, (str, Path)) and str(data).endswith(".zip"):  # i.e. gs://bucket/dir/coco128.zip
+        download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
+        data = next((DATASETS_DIR / Path(data).stem).rglob("*.yaml"))
+        extract_dir, autodownload = data.parent, False
+
+    # Read yaml (optional)
+    if isinstance(data, (str, Path)):
+        with open(data, errors="ignore") as f:
+            data = yaml.safe_load(f)  # dictionary
+
+    # Resolve paths
+    path = Path(extract_dir or data.get("path") or "")  # optional 'path' default to '.'
+    if not path.is_absolute():
+        path = (ROOT / path).resolve()
+    for k in "train", "val", "test":
+        if data.get(k):  # prepend path
+            data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
+
+    # Parse yaml
+    assert "nc" in data, "Dataset 'nc' key missing."
+    if "names" not in data:
+        data["names"] = [f"class{i}" for i in range(data["nc"])]  # assign class names if missing
+    train, val, test, s = (data.get(x) for x in ("train", "val", "test", "download"))
+    if val:
+        val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
+        if not all(x.exists() for x in val):
+            LOGGER.info("\nDataset not found, missing paths: %s" % [str(x) for x in val if not x.exists()])
+            if s and autodownload:  # download script
+                root = path.parent if "path" in data else ".."  # unzip directory i.e. '../'
+                if s.startswith("http") and s.endswith(".zip"):  # URL
+                    f = Path(s).name  # filename
+                    LOGGER.info(f"Downloading {s} to {f}...")
+                    torch.hub.download_url_to_file(s, f)
+                    Path(root).mkdir(parents=True, exist_ok=True)  # create root
+                    ZipFile(f).extractall(path=root)  # unzip
+                    Path(f).unlink()  # remove zip
+                    r = None  # success
+                elif s.startswith("bash "):  # bash script
+                    LOGGER.info(f"Running {s} ...")
+                    r = os.system(s)
+                else:  # python script
+                    r = exec(s, {"yaml": data})  # return None
+                LOGGER.info(
+                    f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n"
+                )
+            else:
+                raise Exception("Dataset not found.")
+
+    return data  # dictionary
+
+
+def download(url, dir=".", unzip=True, delete=True, curl=False, threads=1):
+    # Multi-threaded file download and unzip function, used in data.yaml for autodownload
+    def download_one(url, dir):
+        # Download 1 file
+        f = dir / Path(url).name  # filename
+        if Path(url).is_file():  # exists in current path
+            Path(url).rename(f)  # move to dir
+        elif not f.exists():
+            LOGGER.info(f"Downloading {url} to {f}...")
+            if curl:
+                os.system(
+                    f"curl -L '{url}' -o '{f}' --retry 9 -C -"
+                )  # curl download, retry and resume on fail
+            else:
+                torch.hub.download_url_to_file(url, f, progress=True)  # torch download
+        if unzip and f.suffix in (".zip", ".gz"):
+            LOGGER.info(f"Unzipping {f}...")
+            if f.suffix == ".zip":
+                ZipFile(f).extractall(path=dir)  # unzip
+            elif f.suffix == ".gz":
+                os.system(f"tar xfz {f} --directory {f.parent}")  # unzip
+            if delete:
+                f.unlink()  # remove zip
+
+    dir = Path(dir)
+    dir.mkdir(parents=True, exist_ok=True)  # make directory
+    if threads > 1:
+        pool = ThreadPool(threads)
+        pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multi-threaded
+        pool.close()
+        pool.join()
+    else:
+        for u in [url] if isinstance(url, (str, Path)) else url:
+            download_one(u, dir)
+
+
+class WorkingDirectory(contextlib.ContextDecorator):
+    # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
+    def __init__(self, new_dir):
+        self.dir = new_dir  # new dir
+        self.cwd = Path.cwd().resolve()  # current dir
+
+    def __enter__(self):
+        os.chdir(self.dir)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        os.chdir(self.cwd)
+
+
+def try_except(func):
+    # try-except function. Usage: @try_except decorator
+    def handler(*args, **kwargs):
+        try:
+            func(*args, **kwargs)
+        except Exception as e:
+            print(e)
+
+    return handler
+
+
+def is_docker():
+    # Is environment a Docker container?
+    return Path("/workspace").exists()  # or Path('/.dockerenv').exists()
+
+
+def check_online():
+    # Check internet connectivity
+    import socket
+
+    try:
+        socket.create_connection(("1.1.1.1", 443), 5)  # check host accessibility
+        return True
+    except OSError:
+        return False
+
+
+def emojis(str=""):
+    # Return platform-dependent emoji-safe version of string
+    return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
+
+
+@try_except
+@WorkingDirectory(ROOT)
+def check_git_status():
+    # Recommend 'git pull' if code is out of date
+    msg = ", for updates see https://github.com/ultralytics/yolov5"
+    s = colorstr("github: ")  # string
+    assert Path(".git").exists(), s + "skipping check (not a git repository)" + msg
+    assert not is_docker(), s + "skipping check (Docker image)" + msg
+    assert check_online(), s + "skipping check (offline)" + msg
+
+    cmd = "git fetch && git config --get remote.origin.url"
+    url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip(".git")  # git fetch
+    branch = check_output("git rev-parse --abbrev-ref HEAD", shell=True).decode().strip()  # checked out
+    n = int(check_output(f"git rev-list {branch}..origin/master --count", shell=True))  # commits behind
+    if n > 0:
+        s += (
+            f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. "
+            f"Use `git pull` or `git clone {url}` to update."
+        )
+    else:
+        s += f"up to date with {url} ✅"
+    LOGGER.info(emojis(s))  # emoji-safe
+
+
+def check_python(minimum="3.6.2"):
+    # Check current python version vs. required python version
+    check_version(platform.python_version(), minimum, name="Python ", hard=True)
+
+
+@try_except
+def check_requirements(requirements=ROOT / "requirements.txt", exclude=(), install=True):
+    # Check installed dependencies meet requirements (pass *.txt file or list of packages)
+    prefix = colorstr("red", "bold", "requirements:")
+    check_python()  # check python version
+    if isinstance(requirements, (str, Path)):  # requirements.txt file
+        file = Path(requirements)
+        assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
+        with file.open() as f:
+            requirements = [
+                f"{x.name}{x.specifier}" for x in pkg.parse_requirements(f) if x.name not in exclude
+            ]
+    else:  # list or tuple of packages
+        requirements = [x for x in requirements if x not in exclude]
+
+    n = 0  # number of packages updates
+    for r in requirements:
+        try:
+            pkg.require(r)
+        except Exception:  # DistributionNotFound or VersionConflict if requirements not met
+            s = f"{prefix} {r} not found and is required by YOLOv5"
+            if install:
+                LOGGER.info(f"{s}, attempting auto-update...")
+                try:
+                    assert check_online(), f"'pip install {r}' skipped (offline)"
+                    LOGGER.info(check_output(f"pip install '{r}'", shell=True).decode())
+                    n += 1
+                except Exception as e:
+                    LOGGER.warning(f"{prefix} {e}")
+            else:
+                LOGGER.info(f"{s}. Please install and rerun your command.")
+
+    if n:  # if packages updated
+        source = file.resolve() if "file" in locals() else requirements
+        s = (
+            f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n"
+            f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
+        )
+        LOGGER.info(emojis(s))
+
+
+class Loggers:
+    # YOLOv5 Loggers class
+    def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
+        self.save_dir = save_dir
+        self.weights = weights
+        self.opt = opt
+        self.hyp = hyp
+        self.logger = logger  # for printing results to console
+        self.include = include
+        self.keys = [
+            "train/box_loss",
+            "train/obj_loss",
+            "train/cls_loss",  # train loss
+            "metrics/precision",
+            "metrics/recall",
+            "metrics/mAP_0.5",
+            "metrics/mAP_0.5:0.95",  # metrics
+            "val/box_loss",
+            "val/obj_loss",
+            "val/cls_loss",  # val loss
+            "x/lr0",
+            "x/lr1",
+            "x/lr2",
+        ]  # params
+        self.best_keys = [
+            "best/epoch",
+            "best/precision",
+            "best/recall",
+            "best/mAP_0.5",
+            "best/mAP_0.5:0.95",
+        ]
+        for k in LOGGERS:
+            setattr(self, k, None)  # init empty logger dictionary
+        self.csv = True  # always log to csv
+
+        # Message
+        if not wandb:
+            prefix = colorstr("Weights & Biases: ")
+            s = (
+                f"{prefix}run 'pip install wandb' to automatically track and "
+                "visualize YOLOv5 🚀 runs (RECOMMENDED)"
+            )
+            print(emojis(s))
+
+        # TensorBoard
+        s = self.save_dir
+        if "tb" in self.include and not self.opt.evolve:
+            prefix = colorstr("TensorBoard: ")
+            self.logger.info(
+                f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/"
+            )
+            self.tb = SummaryWriter(str(s))
+
+        # W&B
+        if wandb and "wandb" in self.include:
+            wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith(
+                "wandb-artifact://"
+            )
+            _ = (
+                torch.load(self.weights).get("wandb_id")
+                if self.opt.resume and not wandb_artifact_resume
+                else None
+            )
+            self.opt.hyp = self.hyp  # add hyperparameters
+            # self.wandb = WandbLogger(self.opt, run_id)
+            self.wandb = None
+        else:
+            self.wandb = None
+
+    def on_pretrain_routine_end(self):
+        # Callback runs on pre-train routine end
+        paths = self.save_dir.glob("*labels*.jpg")  # training labels
+        if self.wandb:
+            self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
+
+    def on_train_batch_end(self, ni, model, imgs, targets, paths, plots, sync_bn):
+        # Callback runs on train batch end
+        if plots:
+            if ni == 0:
+                if (
+                    not sync_bn
+                ):  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
+                    with warnings.catch_warnings():
+                        warnings.simplefilter("ignore")  # suppress jit trace warning
+                        self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
+            if ni < 3:
+                f = self.save_dir / f"train_batch{ni}.jpg"  # filename
+                Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
+            if self.wandb and ni == 10:
+                files = sorted(self.save_dir.glob("train*.jpg"))
+                self.wandb.log(
+                    {"Mosaics": [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}
+                )
+
+    def on_train_epoch_end(self, epoch):
+        # Callback runs on train epoch end
+        if self.wandb:
+            self.wandb.current_epoch = epoch + 1
+
+    def on_val_image_end(self, pred, predn, path, names, im):
+        # Callback runs on val image end
+        if self.wandb:
+            self.wandb.val_one_image(pred, predn, path, names, im)
+
+    def on_val_end(self):
+        # Callback runs on val end
+        if self.wandb:
+            files = sorted(self.save_dir.glob("val*.jpg"))
+            self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
+
+    def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
+        # Callback runs at the end of each fit (train+val) epoch
+        x = {k: v for k, v in zip(self.keys, vals)}  # dict
+        if self.csv:
+            file = self.save_dir / "results.csv"
+            n = len(x) + 1  # number of cols
+            s = (
+                "" if file.exists() else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n")
+            )  # add header
+            with open(file, "a") as f:
+                f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n")
+
+        if self.tb:
+            for k, v in x.items():
+                self.tb.add_scalar(k, v, epoch)
+
+        if self.wandb:
+            if best_fitness == fi:
+                best_results = [epoch] + vals[3:7]
+                for i, name in enumerate(self.best_keys):
+                    # log best results in the summary
+                    self.wandb.wandb_run.summary[name] = best_results[i]
+            self.wandb.log(x)
+            self.wandb.end_epoch(best_result=best_fitness == fi)
+
+    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
+        # Callback runs on model save event
+        if self.wandb:
+            if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
+                self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
+
+    def on_train_end(self, last, best, plots, epoch, results):
+        # Callback runs on training end
+        if plots:
+            plot_results(file=self.save_dir / "results.csv")  # save results.png
+        files = ["results.png", "confusion_matrix.png", *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R"))]
+        files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()]  # filter
+
+        if self.tb:
+            import cv2
+
+            for f in files:
+                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC")
+
+        if self.wandb:
+            self.wandb.log({k: v for k, v in zip(self.keys[3:10], results)})  # log best.pt val results
+            self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
+            # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
+            if not self.opt.evolve:
+                wandb.log_artifact(
+                    str(best if best.exists() else last),
+                    type="model",
+                    name="run_" + self.wandb.wandb_run.id + "_model",
+                    aliases=["latest", "best", "stripped"],
+                )
+            self.wandb.finish_run()
+
+    def on_params_update(self, params):
+        # Update hyperparams or configs of the experiment
+        # params: A dict containing {param: value} pairs
+        if self.wandb:
+            self.wandb.wandb_run.config.update(params, allow_val_change=True)
+
+
+class DetectMultiBackend(nn.Module):
+    # YOLOv5 MultiBackend class for python inference on various backends
+    def __init__(self, weights="yolov5s.pt", device=None, dnn=False, data=None):
+        # Usage:
+        #   PyTorch:              weights = *.pt
+        #   TorchScript:                    *.torchscript
+        #   ONNX Runtime:                   *.onnx
+        #   ONNX OpenCV DNN:                *.onnx with --dnn
+        #   OpenVINO:                       *.xml
+        #   CoreML:                         *.mlmodel
+        #   TensorRT:                       *.engine
+        #   TensorFlow SavedModel:          *_saved_model
+        #   TensorFlow GraphDef:            *.pb
+        #   TensorFlow Lite:                *.tflite
+        #   TensorFlow Edge TPU:            *_edgetpu.tflite
+        import yaml
+        from yolort.v5.models.experimental import (  # scoped to avoid circular import
+            attempt_download,
+            attempt_load,
+        )
+
+        super().__init__()
+        w = str(weights[0] if isinstance(weights, list) else weights)
+        stride, names = 64, [f"class{i}" for i in range(1000)]  # assign defaults
+        w = attempt_download(w)  # download if not local
+        if data:  # data.yaml path (optional)
+            with open(data, errors="ignore") as f:
+                names = yaml.safe_load(f)["names"]  # class names
+        model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
+        stride = max(int(model.stride.max()), 32)  # model stride
+        names = model.module.names if hasattr(model, "module") else model.names  # get class names
+        self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
+
+        self.__dict__.update(locals())  # assign all variables to self
+
+    def forward(self, im, augment=False, visualize=False, val=False):
+        # YOLOv5 MultiBackend inference
+        b, ch, h, w = im.shape  # batch, channel, height, width
+        y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
+        return y if val else y[0]
+
+    def warmup(self, imgsz=(1, 3, 640, 640), half=False):
+        # Warmup model by running inference once
+
+        if isinstance(self.device, torch.device) and self.device.type != "cpu":  # only warmup GPU models
+            im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float)  # input image
+            self.forward(im)  # warmup
+
+
+class Callbacks:
+    """ "
+    Handles all registered callbacks for YOLOv5 Hooks
+    """
+
+    def __init__(self):
+        # Define the available callbacks
+        self._callbacks = {
+            "on_pretrain_routine_start": [],
+            "on_pretrain_routine_end": [],
+            "on_train_start": [],
+            "on_train_epoch_start": [],
+            "on_train_batch_start": [],
+            "optimizer_step": [],
+            "on_before_zero_grad": [],
+            "on_train_batch_end": [],
+            "on_train_epoch_end": [],
+            "on_val_start": [],
+            "on_val_batch_start": [],
+            "on_val_image_end": [],
+            "on_val_batch_end": [],
+            "on_val_end": [],
+            "on_fit_epoch_end": [],  # fit = train + val
+            "on_model_save": [],
+            "on_train_end": [],
+            "on_params_update": [],
+            "teardown": [],
+        }
+        self.stop_training = False  # set True to interrupt training
+
+    def register_action(self, hook, name="", callback=None):
+        """
+        Register a new action to a callback hook
+
+        Args:
+            hook        The callback hook name to register the action to
+            name        The name of the action for later reference
+            callback    The callback to fire
+        """
+        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
+        assert callable(callback), f"callback '{callback}' is not callable"
+        self._callbacks[hook].append({"name": name, "callback": callback})
+
+    def get_registered_actions(self, hook=None):
+        """ "
+        Returns all the registered actions by callback hook
+
+        Args:
+            hook The name of the hook to check, defaults to all
+        """
+        if hook:
+            return self._callbacks[hook]
+        else:
+            return self._callbacks
+
+    def run(self, hook, *args, **kwargs):
+        """
+        Loop through the registered actions and fire all callbacks
+
+        Args:
+            hook The name of the hook to check, defaults to all
+            args Arguments to receive from YOLOv5
+            kwargs Keyword Arguments to receive from YOLOv5
+        """
+
+        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
+
+        for logger in self._callbacks[hook]:
+            logger["callback"](*args, **kwargs)
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), eps=1e-16):
+    """Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:  True positives (nparray, nx1 or nx10).
+        conf:  Objectness value from 0-1 (nparray).
+        pred_cls:  Predicted object classes (nparray).
+        target_cls:  True object classes (nparray).
+        plot:  Plot precision-recall curve at mAP@0.5
+        save_dir:  Plot save directory
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes, nt = np.unique(target_cls, return_counts=True)
+    nc = unique_classes.shape[0]  # number of classes, number of detections
+
+    # Create Precision-Recall curve and compute AP for each class
+    px, py = np.linspace(0, 1, 1000), []  # for plotting
+    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
+    for ci, c in enumerate(unique_classes):
+        i = pred_cls == c
+        n_l = nt[ci]  # number of labels
+        n_p = i.sum()  # number of predictions
+
+        if n_p == 0 or n_l == 0:
+            continue
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+
+            # Recall
+            recall = tpc / (n_l + eps)  # recall curve
+            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
+
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+
+            # AP from recall-precision curve
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+                if plot and j == 0:
+                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+
+    # Compute F1 (harmonic mean of precision and recall)
+    f1 = 2 * p * r / (p + r + eps)
+    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
+    names = {i: v for i, v in enumerate(names)}  # to dict
+    if plot:
+        plot_pr_curve(px, py, ap, Path(save_dir) / "PR_curve.png", names)
+        plot_mc_curve(px, f1, Path(save_dir) / "F1_curve.png", names, ylabel="F1")
+        plot_mc_curve(px, p, Path(save_dir) / "P_curve.png", names, ylabel="Precision")
+        plot_mc_curve(px, r, Path(save_dir) / "R_curve.png", names, ylabel="Recall")
+
+    i = f1.mean(0).argmax()  # max F1 index
+    p, r, f1 = p[:, i], r[:, i], f1[:, i]
+    tp = (r * nt).round()  # true positives
+    fp = (tp / (p + eps) - tp).round()  # false positives
+    return tp, fp, p, r, f1, ap, unique_classes.astype("int32")
+
+
+def compute_ap(recall, precision):
+    """Compute the average precision, given the recall and precision curves
+    # Arguments
+        recall:    The recall curve (list)
+        precision: The precision curve (list)
+    # Returns
+        Average precision, precision curve, recall curve
+    """
+
+    # Append sentinel values to beginning and end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([1.0], precision, [0.0]))
+
+    # Compute the precision envelope
+    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+    # Integrate area under curve
+    method = "interp"  # methods: 'continuous', 'interp'
+    if method == "interp":
+        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+    else:  # 'continuous'
+        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+
+    return ap, mpre, mrec
+
+
+def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
+    # Precision-recall curve
+    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+    py = np.stack(py, axis=1)
+
+    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
+        for i, y in enumerate(py.T):
+            ax.plot(px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}")  # plot(recall, precision)
+    else:
+        ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
+
+    ax.plot(px, py.mean(1), linewidth=3, color="blue", label="all classes %.3f mAP@0.5" % ap[:, 0].mean())
+    ax.set_xlabel("Recall")
+    ax.set_ylabel("Precision")
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    fig.savefig(Path(save_dir), dpi=250)
+    plt.close()
+
+
+def plot_mc_curve(px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"):
+    # Metric-confidence curve
+    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+
+    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
+        for i, y in enumerate(py):
+            ax.plot(px, y, linewidth=1, label=f"{names[i]}")  # plot(confidence, metric)
+    else:
+        ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
+
+    y = py.mean(0)
+    ax.plot(px, y, linewidth=3, color="blue", label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}")
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    fig.savefig(Path(save_dir), dpi=250)
+    plt.close()
+
+
+def linear_lr(lrf, epochs):
+    # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
+    return lambda x: (1 - x / (epochs - 1)) * (1.0 - lrf) + lrf
diff --git a/yolort/trainer/val.py b/yolort/trainer/val.py
new file mode 100644
index 00000000..5bbc1269
--- /dev/null
+++ b/yolort/trainer/val.py
@@ -0,0 +1,455 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 model accuracy on a custom dataset
+
+Usage:
+    $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640
+
+Usage - formats:
+    $ python path/to/val.py --weights yolov5s.pt                 # PyTorch
+                                      yolov5s.torchscript        # TorchScript
+                                      yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                      yolov5s.xml                # OpenVINO
+                                      yolov5s.engine             # TensorRT
+                                      yolov5s.mlmodel            # CoreML (MacOS-only)
+                                      yolov5s_saved_model        # TensorFlow SavedModel
+                                      yolov5s.pb                 # TensorFlow GraphDef
+                                      yolov5s.tflite             # TensorFlow Lite
+                                      yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+"""
+
+import argparse
+import json
+import os
+from pathlib import Path
+from threading import Thread
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+# FILE = Path(__file__).resolve()
+# ROOT = FILE.parents[0]  # YOLOv5 root directory
+# if str(ROOT) not in sys.path:
+#     sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+ROOT = Path(".")
+
+from yolort.v5.utils.general import (
+    LOGGER,
+    box_iou,
+    check_img_size,
+    check_yaml,
+    colorstr,
+    increment_path,
+    non_max_suppression,
+    print_args,
+    scale_coords,
+    xywh2xyxy,
+    xyxy2xywh,
+)
+from yolort.v5.utils.metrics import ConfusionMatrix
+from yolort.v5.utils.plots import output_to_target, plot_images, plot_val_study
+from yolort.v5.utils.torch_utils import select_device, time_sync
+
+from .utils import (
+    DetectMultiBackend,
+    check_dataset,
+    check_requirements,
+    create_dataloader,
+    Callbacks,
+    ap_per_class,
+)
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, "a") as f:
+            f.write(("%g " * len(line)).rstrip() % line + "\n")
+
+
+def save_one_json(predn, jdict, path, class_map):
+    # Save one JSON result
+    # {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+    for p, b in zip(predn.tolist(), box.tolist()):
+        jdict.append(
+            {
+                "image_id": image_id,
+                "category_id": class_map[int(p[5])],
+                "bbox": [round(x, 3) for x in b],
+                "score": round(p[4], 5),
+            }
+        )
+
+
+def process_batch(detections, labels, iouv):
+    """
+    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    Arguments:
+        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (Array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (Array[N, 10]), for 10 IoU levels
+    """
+    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+    iou = box_iou(labels[:, 1:], detections[:, :4])
+    x = torch.where(
+        (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
+    )  # IoU above threshold and classes match
+    if x[0].shape[0]:
+        matches = (
+            torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
+        )  # [label, detection, iou]
+        if x[0].shape[0] > 1:
+            matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+            # matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        matches = torch.Tensor(matches).to(iouv.device)
+        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+    return correct
+
+
+@torch.no_grad()
+def run(
+    data,
+    weights=None,  # model.pt path(s)
+    batch_size=32,  # batch size
+    imgsz=640,  # inference size (pixels)
+    conf_thres=0.001,  # confidence threshold
+    iou_thres=0.6,  # NMS IoU threshold
+    task="val",  # train, val, test, speed or study
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    workers=8,  # max dataloader workers (per RANK in DDP mode)
+    single_cls=False,  # treat as single-class dataset
+    augment=False,  # augmented inference
+    verbose=False,  # verbose output
+    save_txt=False,  # save results to *.txt
+    save_hybrid=False,  # save label+prediction hybrid results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_json=False,  # save a COCO-JSON results file
+    project=ROOT / "runs/val",  # save to project/name
+    name="exp",  # save to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    half=True,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    model=None,
+    dataloader=None,
+    save_dir=Path(""),
+    plots=True,
+    callbacks=Callbacks(),
+    compute_loss=None,
+):
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device, pt, jit, engine = (
+            next(model.parameters()).device,
+            True,
+            False,
+            False,
+        )  # get model device, PyTorch model
+
+        half &= device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if half else model.float()
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+        # Load model
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
+        stride, pt, jit, onnx, engine = model.stride, model.pt, model.jit, model.onnx, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half &= (
+            pt or jit or onnx or engine
+        ) and device.type != "cpu"  # FP16 supported on limited backends with CUDA
+        if pt or jit:
+            model.model.half() if half else model.model.float()
+        elif engine:
+            batch_size = model.batch_size
+        else:
+            half = False
+            batch_size = 1  # export.py models default to batch-size 1
+            device = torch.device("cpu")
+            LOGGER.info(
+                f"Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends"
+            )
+
+        # Data
+        data = check_dataset(data)  # check
+
+    # Configure
+    model.eval()
+    is_coco = isinstance(data.get("val"), str) and data["val"].endswith("coco/val2017.txt")  # COCO dataset
+    nc = 1 if single_cls else int(data["nc"])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+
+    # Dataloader
+    if not training:
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz), half=half)  # warmup
+        pad = 0.0 if task in ("speed", "benchmark") else 0.5
+        rect = False if task == "benchmark" else pt  # square inference for benchmarks
+        task = task if task in ("train", "val", "test") else "val"  # path to train/val/test images
+        dataloader = create_dataloader(
+            data[task],
+            imgsz,
+            batch_size,
+            stride,
+            single_cls,
+            pad=pad,
+            rect=rect,
+            workers=workers,
+            prefix=colorstr(f"{task}: "),
+        )[0]
+
+    seen = 0
+    confusion_matrix = ConfusionMatrix(nc=nc)
+    names = {k: v for k, v in enumerate(model.names if hasattr(model, "names") else model.module.names)}
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ("%20s" + "%11s" * 6) % ("Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95")
+    dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    loss = torch.zeros(3, device=device)
+    jdict, stats, ap, ap_class = [], [], [], []
+    pbar = tqdm(dataloader, desc=s, bar_format="{l_bar}{bar:10}{r_bar}{bar:-10b}")  # progress bar
+    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
+        t1 = time_sync()
+        if pt or jit or engine:
+            im = im.to(device, non_blocking=True)
+            targets = targets.to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        nb, _, height, width = im.shape  # batch size, channels, height, width
+        t2 = time_sync()
+        dt[0] += t2 - t1
+
+        # Inference
+        out, train_out = (
+            model(im) if training else model(im, augment=augment, val=True)
+        )  # inference, loss outputs
+        dt[1] += time_sync() - t2
+
+        # Loss
+        if compute_loss:
+            loss += compute_loss([x.float() for x in train_out], targets)[1]  # box, obj, cls
+
+        # NMS
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        t3 = time_sync()
+        out = non_max_suppression(
+            out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls
+        )
+        dt[2] += time_sync() - t3
+
+        # Metrics
+        for si, pred in enumerate(out):
+            labels = targets[targets[:, 0] == si, 1:]
+            nl = len(labels)
+            tcls = labels[:, 0].tolist() if nl else []  # target class
+            path, shape = Path(paths[si]), shapes[si][0]
+            seen += 1
+
+            if len(pred) == 0:
+                if nl:
+                    stats.append(
+                        (torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)
+                    )
+                continue
+
+            # Predictions
+            if single_cls:
+                pred[:, 5] = 0
+            predn = pred.clone()
+            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+
+            # Evaluate
+            if nl:
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct = process_batch(predn, labelsn, iouv)
+                if plots:
+                    confusion_matrix.process_batch(predn, labelsn)
+            else:
+                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
+            stats.append(
+                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)
+            )  # (correct, conf, pcls, tcls)
+
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / "labels" / (path.stem + ".txt"))
+            if save_json:
+                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
+            callbacks.run("on_val_image_end", pred, predn, path, names, im[si])
+
+        # Plot images
+        if plots and batch_i < 3:
+            f = save_dir / f"val_batch{batch_i}_labels.jpg"  # labels
+            Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start()
+            f = save_dir / f"val_batch{batch_i}_pred.jpg"  # predictions
+            Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
+
+    # Compute metrics
+    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
+        ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
+        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
+        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
+    else:
+        nt = torch.zeros(1)
+
+    # Print results
+    pf = "%20s" + "%11i" * 2 + "%11.3g" * 4  # print format
+    LOGGER.info(pf % ("all", seen, nt.sum(), mp, mr, map50, map))
+
+    # Print results per class
+    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(ap_class):
+            LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+
+    # Print speeds
+    t = tuple(x / seen * 1e3 for x in dt)  # speeds per image
+    if not training:
+        shape = (batch_size, 3, imgsz, imgsz)
+        LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
+
+    # Plots
+    if plots:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+        callbacks.run("on_val_end")
+
+    # Save JSON
+    if save_json and len(jdict):
+        w = (
+            Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ""
+        )  # weights
+        anno_json = str(
+            Path(data.get("path", "../coco")) / "annotations/instances_val2017.json"
+        )  # annotations json
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
+        LOGGER.info(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
+        with open(pred_json, "w") as f:
+            json.dump(jdict, f)
+
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            check_requirements(["pycocotools"])
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            eval = COCOeval(anno, pred, "bbox")
+            if is_coco:
+                eval.params.imgIds = [
+                    int(Path(x).stem) for x in dataloader.dataset.img_files
+                ]  # image IDs to evaluate
+            eval.evaluate()
+            eval.accumulate()
+            eval.summarize()
+            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+        except Exception as e:
+            LOGGER.info(f"pycocotools unable to run: {e}")
+
+    # Return results
+    model.float()  # for training
+    if not training:
+        s = (
+            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
+            if save_txt
+            else ""
+        )
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    maps = np.zeros(nc) + map
+    for i, c in enumerate(ap_class):
+        maps[c] = ap[i]
+    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path")
+    parser.add_argument(
+        "--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model.pt path(s)"
+    )
+    parser.add_argument("--batch-size", type=int, default=32, help="batch size")
+    parser.add_argument(
+        "--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)"
+    )
+    parser.add_argument("--conf-thres", type=float, default=0.001, help="confidence threshold")
+    parser.add_argument("--iou-thres", type=float, default=0.6, help="NMS IoU threshold")
+    parser.add_argument("--task", default="val", help="train, val, test, speed or study")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument(
+        "--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)"
+    )
+    parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
+    parser.add_argument("--augment", action="store_true", help="augmented inference")
+    parser.add_argument("--verbose", action="store_true", help="report mAP by class")
+    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
+    parser.add_argument(
+        "--save-hybrid", action="store_true", help="save label+prediction hybrid results to *.txt"
+    )
+    parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
+    parser.add_argument("--save-json", action="store_true", help="save a COCO-JSON results file")
+    parser.add_argument("--project", default=ROOT / "runs/val", help="save to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
+    opt.save_json |= opt.data.endswith("coco.yaml")
+    opt.save_txt |= opt.save_hybrid
+    print_args(FILE.stem, opt)
+    return opt
+
+
+def main(opt):
+    check_requirements(requirements=ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+
+    if opt.task in ("train", "val", "test"):  # run normally
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.info(
+                f"WARNING: confidence threshold {opt.conf_thres} >> 0.001 will produce invalid mAP values."
+            )
+        run(**vars(opt))
+
+    else:
+        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
+        opt.half = True  # FP16 for fastest results
+        if opt.task == "speed":  # speed benchmarks
+            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
+            for opt.weights in weights:
+                run(**vars(opt), plots=False)
+
+        elif opt.task == "study":  # speed vs mAP benchmarks
+            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+            for opt.weights in weights:
+                f = f"study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt"  # filename to save to
+                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
+                for opt.imgsz in x:  # img-size
+                    LOGGER.info(f"\nRunning {f} --imgsz {opt.imgsz}...")
+                    r, _, t = run(**vars(opt), plots=False)
+                    y.append(r + t)  # results and times
+                np.savetxt(f, y, fmt="%10.4g")  # save
+            os.system("zip -r study.zip study_*.txt")
+            plot_val_study(x=x)  # plot
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)