Cambricon PyTorch Model Migration Report

Cambricon PyTorch Changes

| No. | File | Description | | 1 | hubconf.py:6 | add "import torch_mlu" | | 2 | setup.py:10 | add "import torch_mlu" | | 3 | yolox/exp/yolox_base.py:7 | add "import torch_mlu" | | 4 | yolox/exp/yolox_base.py:225 | change "tensor = torch.LongTensor(2).cuda()" to "tensor = torch.LongTensor(2).mlu() " | | 5 | yolox/exp/base_exp.py:10 | add "import torch_mlu" | | 6 | yolox/evaluators/coco_evaluator.py:18 | add "import torch_mlu" | | 7 | yolox/evaluators/coco_evaluator.py:135 | change "tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor" to "tensor_type = torch.mlu.HalfTensor if half else torch.mlu.FloatTensor " | | 8 | yolox/evaluators/coco_evaluator.py:154 | change "x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()" to "x = torch.ones(1, 3, test_size[0], test_size[1]).mlu() " | | 9 | yolox/evaluators/coco_evaluator.py:189 | change "statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])" to "statistics = torch.mlu.FloatTensor([inference_time, nms_time, n_samples]) " | | 10 | yolox/evaluators/voc_evaluator.py:14 | add "import torch_mlu" | | 11 | yolox/evaluators/voc_evaluator.py:60 | change "tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor" to "tensor_type = torch.mlu.HalfTensor if half else torch.mlu.FloatTensor " | | 12 | yolox/evaluators/voc_evaluator.py:78 | change "x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()" to "x = torch.ones(1, 3, test_size[0], test_size[1]).mlu() " | | 13 | yolox/evaluators/voc_evaluator.py:108 | change "statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])" to "statistics = torch.mlu.FloatTensor([inference_time, nms_time, n_samples]) " | | 14 | yolox/models/yolo_pafpn.py:5 | add "import torch_mlu" | | 15 | yolox/models/build.py:4 | add "import torch_mlu" | | 16 | yolox/models/build.py:53 | change "device = "cuda:0" if torch.cuda.is_available() else "cpu"" to "device = "mlu:0" if torch.mlu.is_available() else "cpu" " | | 17 | yolox/models/losses.py:5 | add "import torch_mlu" | | 18 | yolox/models/yolo_fpn.py:5 | add "import torch_mlu" | | 19 | yolox/models/network_blocks.py:5 | add "import torch_mlu" | | 20 | yolox/models/yolo_head.py:8 | add "import torch_mlu" | | 21 | yolox/models/yolo_head.py:324 | change "if "CUDA out of memory. " not in str(e):" to "if "MLU out of memory. " not in str(e): " | | 22 | yolox/models/yolo_head.py:325 | change "raise # RuntimeError might not caused by CUDA OOM" to "raise # RuntimeError might not caused by MLU OOM " | | 23 | yolox/models/yolo_head.py:332 | change "torch.cuda.empty_cache()" to "torch.mlu.empty_cache() " | | 24 | yolox/models/yolo_head.py:353 | change "torch.cuda.empty_cache()" to "torch.mlu.empty_cache() " | | 25 | yolox/models/yolo_head.py:435 | change "mode="gpu"," to "mode="mlu", " | | 26 | yolox/models/yolo_head.py:474 | change "with torch.cuda.amp.autocast(enabled=False):" to "with torch.mlu.amp.autocast(enabled=False): " | | 27 | yolox/models/yolo_head.py:500 | change "gt_matched_classes = gt_matched_classes.cuda()" to "gt_matched_classes = gt_matched_classes.mlu() " | | 28 | yolox/models/yolo_head.py:501 | change "fg_mask = fg_mask.cuda()" to "fg_mask = fg_mask.mlu() " | | 29 | yolox/models/yolo_head.py:502 | change "pred_ious_this_matching = pred_ious_this_matching.cuda()" to "pred_ious_this_matching = pred_ious_this_matching.mlu() " | | 30 | yolox/models/yolo_head.py:503 | change "matched_gt_inds = matched_gt_inds.cuda()" to "matched_gt_inds = matched_gt_inds.mlu() " | | 31 | yolox/models/yolo_head.py:519 | change "the number of candidate anchors so that the GPU memory is saved." to "the number of candidate anchors so that the MLU memory is saved. " | | 32 | yolox/core/launch.py:12 | add "import torch_mlu" | | 33 | yolox/core/launch.py:41 | change "num_gpus_per_machine," to "num_mlus_per_machine, " | | 34 | yolox/core/launch.py:44 | change "backend="nccl"," to "backend="cncl", " | | 35 | yolox/core/launch.py:59 | change "world_size = num_machines * num_gpus_per_machine" to "world_size = num_machines * num_mlus_per_machine " | | 36 | yolox/core/launch.py:84 | change "nprocs=num_gpus_per_machine," to "nprocs=num_mlus_per_machine, " | | 37 | yolox/core/launch.py:88 | change "num_gpus_per_machine," to "num_mlus_per_machine, " | | 38 | yolox/core/launch.py:105 | change "num_gpus_per_machine," to "num_mlus_per_machine, " | | 39 | yolox/core/launch.py:113 | change "torch.cuda.is_available()" to "torch.mlu.is_available() " | | 40 | yolox/core/launch.py:114 | change "), "cuda is not available. Please check your installation."" to "), "mlu is not available. Please check your installation." " | | 41 | yolox/core/launch.py:115 | change "global_rank = machine_rank * num_gpus_per_machine + local_rank" to "global_rank = machine_rank * num_mlus_per_machine + local_rank " | | 42 | yolox/core/launch.py:131 | change "num_machines = world_size // num_gpus_per_machine" to "num_machines = world_size // num_mlus_per_machine " | | 43 | yolox/core/launch.py:134 | change "range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)" to "range(i * num_mlus_per_machine, (i + 1) * num_mlus_per_machine) " | | 44 | yolox/core/launch.py:144 | change "assert num_gpus_per_machine <= torch.cuda.device_count()" to "assert num_mlus_per_machine <= torch.mlu.device_count() " | | 45 | yolox/core/launch.py:145 | change "torch.cuda.set_device(local_rank)" to "torch.mlu.set_device(local_rank) " | | 46 | yolox/core/trainer.py:9 | add "import torch_mlu" | | 47 | yolox/core/trainer.py:25 | change "gpu_mem_usage," to "mlu_mem_usage, " | | 48 | yolox/core/trainer.py:46 | change "self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16)" to "self.scaler = torch.mlu.amp.GradScaler(enabled=args.fp16) " | | 49 | yolox/core/trainer.py:50 | change "self.device = "cuda:{}".format(self.local_rank)" to "self.device = "mlu:{}".format(self.local_rank) " | | 50 | yolox/core/trainer.py:104 | change "with torch.cuda.amp.autocast(enabled=self.amp_training):" to "with torch.mlu.amp.autocast(enabled=self.amp_training): " | | 51 | yolox/core/trainer.py:134 | change "torch.cuda.set_device(self.local_rank)" to "torch.mlu.set_device(self.local_rank) " | | 52 | yolox/core/trainer.py:253 | change "mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage())" to "mem_str = "mlu mem: {:.0f}Mb, mem: {:.1f}Gb".format(mlu_mem_usage(), mem_usage()) " | | 53 | yolox/data/dataloading.py:11 | add "import torch_mlu" | | 54 | yolox/data/samplers.py:8 | add "import torch_mlu" | | 55 | yolox/data/data_prefetcher.py:5 | add "import torch_mlu" | | 56 | yolox/data/data_prefetcher.py:18 | change "self.stream = torch.cuda.Stream()" to "self.stream = torch.mlu.Stream() " | | 57 | yolox/data/data_prefetcher.py:19 | change "self.input_cuda = self.input_cuda_for_image" to "self.input_mlu = self.input_mlu_for_image " | | 58 | yolox/data/data_prefetcher.py:31 | change "with torch.cuda.stream(self.stream):" to "with torch.mlu.stream(self.stream): " | | 59 | yolox/data/data_prefetcher.py:32 | change "self.input_cuda()" to "self.input_mlu() " | | 60 | yolox/data/data_prefetcher.py:33 | change "self.next_target = self.next_target.cuda(non_blocking=True)" to "self.next_target = self.next_target.mlu(non_blocking=True) " | | 61 | yolox/data/data_prefetcher.py:36 | change "torch.cuda.current_stream().wait_stream(self.stream)" to "torch.mlu.current_stream().wait_stream(self.stream) " | | 62 | yolox/data/data_prefetcher.py:42 | change "target.record_stream(torch.cuda.current_stream())" to "target.record_stream(torch.mlu.current_stream()) " | | 63 | yolox/data/data_prefetcher.py:46 | change "def input_cuda_for_image(self):" to "def input_mlu_for_image(self): " | | 64 | yolox/data/data_prefetcher.py:47 | change "self.next_input = self.next_input.cuda(non_blocking=True)" to "self.next_input = self.next_input.mlu(non_blocking=True) " | | 65 | yolox/data/data_prefetcher.py:51 | change "input.record_stream(torch.cuda.current_stream())" to "input.record_stream(torch.mlu.current_stream()) " | | 66 | yolox/utils/compat.py:4 | add "import torch_mlu" | | 67 | yolox/utils/checkpoint.py:8 | add "import torch_mlu" | | 68 | yolox/utils/logger.py:13 | add "import torch_mlu" | | 69 | yolox/utils/logger.py:86 | change "distributed_rank(int): device rank when multi-gpu environment" to "distributed_rank(int): device rank when multi-mlu environment " | | 70 | yolox/utils/setup_env.py:13 | change "all = ["configure_nccl", "configure_module", "configure_omp"]" to "all = ["configure_cncl", "configure_module", "configure_omp"] " | | 71 | yolox/utils/setup_env.py:16 | change "def configure_nccl():" to "def configure_cncl(): " | | 72 | yolox/utils/boxes.py:6 | add "import torch_mlu" | | 73 | yolox/utils/model_utils.py:9 | add "import torch_mlu" | | 74 | yolox/utils/ema.py:7 | add "import torch_mlu" | | 75 | yolox/utils/ema.py:30 | change "GPU assignment and distributed training wrappers." to "MLU assignment and distributed training wrappers. " | | 76 | yolox/utils/metric.py:12 | add "import torch_mlu" | | 77 | yolox/utils/metric.py:19 | change ""gpu_mem_usage"," to ""mlu_mem_usage", " | | 78 | yolox/utils/metric.py:24 | change "def get_total_and_free_memory_in_Mb(cuda_device):" to "def get_total_and_free_memory_in_Mb(mlu_device): " | | 79 | yolox/utils/metric.py:26 | change ""nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"" to ""nvidia-smi --query-mlu=memory.total,memory.used --format=csv,nounits,noheader" " | | 80 | yolox/utils/metric.py:29 | change "if "CUDA_VISIBLE_DEVICES" in os.environ:" to "if "MLU_VISIBLE_DEVICES" in os.environ: " | | 81 | yolox/utils/metric.py:30 | change "visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',')" to "visible_devices = os.environ["MLU_VISIBLE_DEVICES"].split(',') " | | 82 | yolox/utils/metric.py:31 | change "cuda_device = int(visible_devices[cuda_device])" to "mlu_device = int(visible_devices[mlu_device]) " | | 83 | yolox/utils/metric.py:32 | change "total, used = devices_info[int(cuda_device)].split(",")" to "total, used = devices_info[int(mlu_device)].split(",") " | | 84 | yolox/utils/metric.py:36 | change "def occupy_mem(cuda_device, mem_ratio=0.9):" to "def occupy_mem(mlu_device, mem_ratio=0.9): " | | 85 | yolox/utils/metric.py:38 | change "pre-allocate gpu memory for training to avoid memory Fragmentation." to "pre-allocate mlu memory for training to avoid memory Fragmentation. " | | 86 | yolox/utils/metric.py:40 | change "total, used = get_total_and_free_memory_in_Mb(cuda_device)" to "total, used = get_total_and_free_memory_in_Mb(mlu_device) " | | 87 | yolox/utils/metric.py:43 | change "x = torch.cuda.FloatTensor(256, 1024, block_mem)" to "x = torch.mlu.FloatTensor(256, 1024, block_mem) " | | 88 | yolox/utils/metric.py:48 | change "def gpu_mem_usage():" to "def mlu_mem_usage(): " | | 89 | yolox/utils/metric.py:50 | change "Compute the GPU memory usage for the current device (MB)." to "Compute the MLU memory usage for the current device (MB). " | | 90 | yolox/utils/metric.py:52 | change "mem_usage_bytes = torch.cuda.max_memory_allocated()" to "mem_usage_bytes = torch.mlu.max_memory_allocated() " | | 91 | yolox/utils/dist.py:8 | change "This file contains primitives for multi-gpu communication." to "This file contains primitives for multi-mlu communication. " | | 92 | yolox/utils/dist.py:21 | add "import torch_mlu" | | 93 | yolox/utils/dist.py:42 | change "gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)" to "mlu_list = os.getenv('MLU_VISIBLE_DEVICES', None) " | | 94 | yolox/utils/dist.py:43 | change "if gpu_list is not None:" to "if mlu_list is not None: " | | 95 | yolox/utils/dist.py:44 | change "return len(gpu_list.split(','))" to "return len(mlu_list.split(',')) " | | 96 | yolox/utils/dist.py:142 | change "if dist.get_backend() == "nccl":" to "if dist.get_backend() == "cncl": " | | 97 | yolox/utils/dist.py:150 | change "assert backend in ["gloo", "nccl"]" to "assert backend in ["gloo", "cncl"] " | | 98 | yolox/utils/dist.py:151 | change "device = torch.device("cpu" if backend == "gloo" else "cuda")" to "device = torch.device("cpu" if backend == "gloo" else "mlu") " | | 99 | yolox/utils/dist.py:292 | change "if torch.cuda.is_available():" to "if torch.mlu.is_available(): " | | 100 | yolox/utils/dist.py:293 | change "torch.cuda.synchronize()" to "torch.mlu.synchronize() " | | 101 | yolox/utils/allreduce_norm.py:8 | add "import torch_mlu" | | 102 | yolox/utils/allreduce_norm.py:41 | change "def pyobj2tensor(pyobj, device="cuda"):" to "def pyobj2tensor(pyobj, device="mlu"): " | | 103 | yolox/layers/jit_ops.py:58 | change """"Get optional list of compiler flags to forward to nvcc when building CUDA sources"""" to """"Get optional list of compiler flags to forward to nvcc when building MLU sources""" " | | 104 | yolox/layers/jit_ops.py:62 | change ""-U__CUDA_NO_HALF_OPERATORS"," to ""-U__MLU_NO_HALF_OPERATORS", " | | 105 | yolox/layers/jit_ops.py:63 | change ""-U__CUDA_NO_HALF_CONVERSIONS__"," to ""-U__MLU_NO_HALF_CONVERSIONS__", " | | 106 | yolox/layers/jit_ops.py:64 | change ""-U__CUDA_NO_HALF2_OPERATORS__"," to ""-U__MLU_NO_HALF2_OPERATORS__", " | | 107 | yolox/layers/jit_ops.py:106 | change "extra_cuda_cflags=self.nvcc_args()," to "extra_mlu_cflags=self.nvcc_args(), " | | 108 | tools/export_onnx.py:9 | add "import torch_mlu" | | 109 | tools/eval.py:11 | add "import torch_mlu" | | 110 | tools/eval.py:19 | change "configure_nccl," to "configure_cncl, " | | 111 | tools/eval.py:34 | change ""--dist-backend", default="nccl", type=str, help="distributed backend"" to ""--dist-backend", default="cncl", type=str, help="distributed backend" " | | 112 | tools/eval.py:116 | change "def main(exp, args, num_gpu):" to "def main(exp, args, num_mlu): " | | 113 | tools/eval.py:125 | change "is_distributed = num_gpu > 1" to "is_distributed = num_mlu > 1 " | | 114 | tools/eval.py:128 | change "configure_nccl()" to "configure_cncl() " | | 115 | tools/eval.py:156 | change "torch.cuda.set_device(rank)" to "torch.mlu.set_device(rank) " | | 116 | tools/eval.py:157 | change "model.cuda(rank)" to "model.mlu(rank) " | | 117 | tools/eval.py:166 | change "loc = "cuda:{}".format(rank)" to "loc = "mlu:{}".format(rank) " | | 118 | tools/eval.py:208 | change "num_gpu = torch.cuda.device_count() if args.devices is None else args.devices" to "num_mlu = torch.mlu.device_count() if args.devices is None else args.devices " | | 119 | tools/eval.py:209 | change "assert num_gpu <= torch.cuda.device_count()" to "assert num_mlu <= torch.mlu.device_count() " | | 120 | tools/eval.py:214 | change "num_gpu," to "num_mlu, " | | 121 | tools/eval.py:219 | change "args=(exp, args, num_gpu)," to "args=(exp, args, num_mlu), " | | 122 | tools/trt.py:11 | add "import torch_mlu" | | 123 | tools/trt.py:59 | change "model.cuda()" to "model.mlu() " | | 124 | tools/trt.py:61 | change "x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()" to "x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).mlu() " | | 125 | tools/demo.py:12 | add "import torch_mlu" | | 126 | tools/demo.py:53 | change "help="device to run our model, can either be cpu or gpu"," to "help="device to run our model, can either be cpu or mlu", " | | 127 | tools/demo.py:128 | change "x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()" to "x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).mlu() " | | 128 | tools/demo.py:151 | change "if self.device == "gpu":" to "if self.device == "mlu": " | | 129 | tools/demo.py:152 | change "img = img.cuda()" to "img = img.mlu() " | | 130 | tools/demo.py:257 | change "args.device = "gpu"" to "args.device = "mlu" " | | 131 | tools/demo.py:271 | change "if args.device == "gpu":" to "if args.device == "mlu": " | | 132 | tools/demo.py:272 | change "model.cuda()" to "model.mlu() " | | 133 | tools/train.py:10 | add "import torch_mlu" | | 134 | tools/train.py:15 | change "from yolox.utils import configure_module, configure_nccl, configure_omp, get_num_devices" to "from yolox.utils import configure_module, configure_cncl, configure_omp, get_num_devices " | | 135 | tools/train.py:25 | change ""--dist-backend", default="nccl", type=str, help="distributed backend"" to ""--dist-backend", default="cncl", type=str, help="distributed backend" " | | 136 | tools/train.py:81 | change "help="occupy GPU memory first for training."," to "help="occupy MLU memory first for training.", " | | 137 | tools/train.py:113 | change "configure_nccl()" to "configure_cncl() " | | 138 | tools/train.py:131 | change "num_gpu = get_num_devices() if args.devices is None else args.devices" to "num_mlu = get_num_devices() if args.devices is None else args.devices " | | 139 | tools/train.py:132 | change "assert num_gpu <= get_num_devices()" to "assert num_mlu <= get_num_devices() " | | 140 | tools/train.py:140 | change "num_gpu," to "num_mlu, " | | 141 | tools/visualize_assign.py:11 | add "import torch_mlu" | | 142 | tools/visualize_assign.py:38 | change "with torch.cuda.amp.autocast(enabled=self.amp_training):" to "with torch.mlu.amp.autocast(enabled=self.amp_training): " | | 143 | tools/export_torchscript.py:9 | add "import torch_mlu" | | 144 | tests/utils/test_model_utils.py:7 | add "import torch_mlu" | | 145 | demo/nebullvm/nebullvm_optimization.py:1 | add "import torch_mlu" | | 146 | demo/nebullvm/nebullvm_optimization.py:9 | change "model.cuda()" to "model.mlu() " | | 147 | demo/nebullvm/nebullvm_optimization.py:12 | change "device = torch.device("cuda" if torch.cuda.is_available() else "cpu")" to "device = torch.device("mlu" if torch.mlu.is_available() else "cpu") " | | 148 | demo/MegEngine/python/convert_weights.py:7 | add "import torch_mlu" | | 149 | demo/OpenVINO/python/openvino_inference.py:60 | change "help='Optional. Specify the target device to infer on; CPU, GPU, " to "help='Optional. Specify the target device to infer on; CPU, MLU, \ " |

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

report.md

report.md

Cambricon PyTorch Model Migration Report

Cambricon PyTorch Changes

Files

report.md

Latest commit

History

report.md

File metadata and controls

Cambricon PyTorch Model Migration Report

Cambricon PyTorch Changes