diff --git a/tutorials/README.md b/tutorials/README.md index 0796e8169..8998b4df2 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -9,18 +9,121 @@ Learn how to quickly quantize pre-trained models using MCT's post-training quant - [Post training quantization with Keras](notebooks/imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb) - [Post training quantization with PyTorch](notebooks/mct_features_notebooks/pytorch/example_pytorch_post_training_quantization.ipynb) -## MCT Features -This set of tutorials covers all the quantization tools provided by MCT. -The notebooks in this section demonstrate how to configure and run simple and advanced post-training quantization methods. -This includes fine-tuning PTQ (Post-Training Quantization) configurations, exporting models, -and exploring advanced compression techniques. +## MCT features +This tutorial set introduces the various quantization tools offered by MCT. +The notebooks included here illustrate the setup and usage of both basic and advanced post-training quantization methods. +You'll learn how to refine PTQ (Post-Training Quantization) settings, export models, and explore advanced compression +techniques such as GPTQ (Gradient-Based Post-Training Quantization), Mixed precision quantization and more. These techniques are essential for further optimizing models and achieving superior performance in deployment scenarios. -- [MCT Features notebooks](notebooks/mct_features_notebooks/README.md) -## Quantization for Sony-IMX500 deployment +### Keras Tutorials -This section provides several guides on quantizing pre-trained models to meet specific constraints for deployment on the -[Sony-IMX500](https://developer.sony.com/imx500/) processing platform. -We will cover various tasks and demonstrate the necessary steps to achieve efficient quantization for optimal -deployment performance. -- [MCT IMX500 notebooks](notebooks/imx500_notebooks/README.md) +
+ Post-Training Quantization (PTQ) + + | Tutorial | Included Features | + |--------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------| + | [Basic Post-Training Quantization (PTQ)](keras/example_keras_post-training_quantization.ipynb) | ✅ PTQ | + | [MobileNetV2](../imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb) | ✅ PTQ | + | [Mixed-Precision MobileNetV2](keras/example_keras_mobilenet_mixed_precision.ipynb) | ✅ PTQ
✅ Mixed-Precision | + | [Nanodet-Plus](../imx500_notebooks/keras/example_keras_nanodet_plus_for_imx500.ipynb) | ✅ PTQ | + | [EfficientDetLite0](../imx500_notebooks/keras/example_keras_effdet_lite0_for_imx500.ipynb) | ✅ PTQ
✅ [sony-custom-layers](https://github.com/sony/custom_layers) integration | + +
+ +
+ Gradient-Based Post-Training Quantization (GPTQ) + + | Tutorial | Included Features | + |------------------------------|---------------| + | [MobileNetV2](keras/example_keras_mobilenet_gptq.ipynb) | ✅ GPTQ | + +
+ +
+ Quantization-Aware Training (QAT) + + | Tutorial | Included Features | + |---------------------------------------------------|--------------| + | [QAT on MNIST](keras/example_keras_qat.ipynb) | ✅ QAT | + +
+ + +
+ Structured Pruning + + | Tutorial | Included Features | + |---------------------------------------------------------------------|------------------| + | [Fully-Connected Model Pruning](keras/example_keras_pruning_mnist.ipynb) | ✅ Pruning | + +
+ +
+ Export Quantized Models + + | Tutorial | Included Features | + |---------------------------------------------------------------------------------------|-----------------| + | [Exporter Usage](keras/example_keras_export.ipynb) | ✅ Export | + +
+ +
+ Debug Tools + + | Tutorial | Included Features | + |-------------------------------------------------------------------------------------|-------------------------| + | [Network Editor Usage](keras/example_keras_network_editor.ipynb) | ✅ Network Editor | + +
+ +### Pytorch Tutorials + + +
+ Post-Training Quantization (PTQ) + + | Tutorial | Included Features | + |-----------------------------------------------------------------------------------------------------------|---------------------------------------------| + | [Basic Post-Training Quantization (PTQ)](pytorch/example_pytorch_post_training_quantization.ipynb) | ✅ PTQ | + | [Mixed-Precision Post-Training Quantization](pytorch/example_pytorch_mixed_precision_ptq.ipynb) | ✅ PTQ
✅ Mixed-Precision | + | [Advanced Gradient-Based Post-Training Quantization (GPTQ)](pytorch/example_pytorch_mobilenet_gptq.ipynb) | ✅ GPTQ | + +
+ +
+ Structured Pruning + + | Tutorial | Included Features | + |--------------------------------------------------------------------------------------|------------------| + | [Fully-Connected Model Pruning](pytorch/example_pytorch_pruning_mnist.ipynb) | ✅ Pruning | + + +
+ +
+ Data Generation + + | Tutorial | Included Features | + |-----------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| + | [Zero-Shot Quantization (ZSQ) using Data Generation](pytorch/example_pytorch_data_generation.ipynb) | ✅ PTQ
✅ ZSQ
✅ Data-Free Quantization
✅ Data Generation | + +
+ + +
+ Export Quantized Models + + | Tutorial | Included Features | + |---------------------------------------------------------------------------------------|-----------------| + | [Exporter Usage](pytorch/example_pytorch_export.ipynb) | ✅ Export | + +
+
+ Quantization Troubleshooting + + | Tutorial | Included Features | + |------------------------------------------------------------------------------------------------|-------------------| + | [Quantization Troubleshooting using the Xquant Feature](pytorch/example_pytorch_xquant.ipynb) | ✅ Debug | + +
diff --git a/tutorials/mct_model_garden/models_keras/__init__.py b/tutorials/keras/__init__.py similarity index 100% rename from tutorials/mct_model_garden/models_keras/__init__.py rename to tutorials/keras/__init__.py diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_threshold_search.ipynb b/tutorials/keras/example_keras_activation_threshold_search.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_threshold_search.ipynb rename to tutorials/keras/example_keras_activation_threshold_search.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_z_score_threshold.ipynb b/tutorials/keras/example_keras_activation_z_score_threshold.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_z_score_threshold.ipynb rename to tutorials/keras/example_keras_activation_z_score_threshold.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_export.ipynb b/tutorials/keras/example_keras_export.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_export.ipynb rename to tutorials/keras/example_keras_export.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_gptq.ipynb b/tutorials/keras/example_keras_mobilenet_gptq.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_gptq.ipynb rename to tutorials/keras/example_keras_mobilenet_gptq.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_mixed_precision.ipynb b/tutorials/keras/example_keras_mobilenet_mixed_precision.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_mixed_precision.ipynb rename to tutorials/keras/example_keras_mobilenet_mixed_precision.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_network_editor.ipynb b/tutorials/keras/example_keras_network_editor.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_network_editor.ipynb rename to tutorials/keras/example_keras_network_editor.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_post-training_quantization.ipynb b/tutorials/keras/example_keras_post-training_quantization.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_post-training_quantization.ipynb rename to tutorials/keras/example_keras_post-training_quantization.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_pruning_mnist.ipynb b/tutorials/keras/example_keras_pruning_mnist.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_pruning_mnist.ipynb rename to tutorials/keras/example_keras_pruning_mnist.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_qat.ipynb b/tutorials/keras/example_keras_qat.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_qat.ipynb rename to tutorials/keras/example_keras_qat.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_xquant.ipynb b/tutorials/keras/example_keras_xquant.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/keras/example_keras_xquant.ipynb rename to tutorials/keras/example_keras_xquant.ipynb diff --git a/tutorials/mct_model_garden/README.md b/tutorials/mct_model_garden/README.md deleted file mode 100644 index 54eaba7ff..000000000 --- a/tutorials/mct_model_garden/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# MCT Model Garden - -MCT Model Garden is a collection of models sourced from various repositories and adjusted for quantization using Model Compression Toolkit (MCT). - -Adjustments are sometimes necessary before applying MCT due to various reasons, such as: -- Enabling the conversion of the model to a static graph (the initial stage in MCT). -- Enhancing the quantization outcome. -- Converting unsupported operators. - -Note that in many cases, adjustments are unnecessary before applying MCT. - -## Models - -| Model | Source Repository | MCT Model Garden | -|--------------|-----------------------------|-------------------------------------------------------------------------------------------------------------------------| -| EfficientDet | [efficientdet-pytorch](https://github.com/rwightman/efficientdet-pytorch) | [model](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_keras/efficientdet) | -| Nanodet-Plus | [Nanodet-Plus](https://github.com/RangiLyu/nanodet) | [model](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_keras/nanodet) | -| Yolov8n | [Ultralytics](https://github.com/ultralytics/ultralytics) | [model](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_keras/yolov8) | - - - -## License -This project is licensed under [Apache License 2.0](../../LICENSE.md). -However, please note that different repositories have their own licenses. Therefore, when using a model from -this library, it's essential to also comply with the licensing terms of the source repositories. diff --git a/tutorials/mct_model_garden/__init__.py b/tutorials/mct_model_garden/__init__.py deleted file mode 100644 index 2147ec284..000000000 --- a/tutorials/mct_model_garden/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/evaluation_metrics/__init__.py b/tutorials/mct_model_garden/evaluation_metrics/__init__.py deleted file mode 100644 index 2147ec284..000000000 --- a/tutorials/mct_model_garden/evaluation_metrics/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/evaluation_metrics/coco_evaluation.py b/tutorials/mct_model_garden/evaluation_metrics/coco_evaluation.py deleted file mode 100644 index 4311bac4a..000000000 --- a/tutorials/mct_model_garden/evaluation_metrics/coco_evaluation.py +++ /dev/null @@ -1,559 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -import json -import cv2 -import os -import numpy as np -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from typing import List, Dict, Tuple, Callable, Any -import random -from pycocotools import mask as mask_utils -from tqdm import tqdm - -from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation_utils import scale_boxes, scale_coords -from ..models_pytorch.yolov8.yolov8_preprocess import yolov8_preprocess_chw_transpose -from ..models_pytorch.yolov8.postprocess_yolov8_seg import process_masks, postprocess_yolov8_inst_seg - - -def coco80_to_coco91(x: np.ndarray) -> np.ndarray: - """ - Converts COCO 80-class indices to COCO 91-class indices. - - Args: - x (numpy.ndarray): An array of COCO 80-class indices. - - Returns: - numpy.ndarray: An array of corresponding COCO 91-class indices. - """ - coco91Indexs = np.array( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]) - return coco91Indexs[x.astype(np.int32)] - - -# COCO evaluation class -class CocoEval: - def __init__(self, path2json: str, output_resize: Dict = None, task: str = 'Detection'): - """ - Initialize the CocoEval class. - - Args: - path2json (str): Path to the COCO JSON file containing ground truth annotations. - output_resize (Dict): Contains the resize information to map between the model's output and the original - image dimensions. The dict consists of: - {"shape": (height, weight), - "aspect_ratio_preservation": bool} - """ - # Load ground truth annotations - self.coco_gt = COCO(path2json) - - # A list of reformatted model outputs - self.all_detections = [] - - # Resizing information to map between the model's output and the original image dimensions - self.output_resize = output_resize if output_resize else {'shape': (1, 1), 'aspect_ratio_preservation': False} - - # Set the task type (Detection/Segmentation/Keypoints) - self.task = task - - def add_batch_detections(self, outputs: Tuple[List, List, List, List], targets: List[Dict]): - """ - Add batch detections to the evaluation. - - Args: - outputs (list): List of model outputs, typically containing bounding boxes, scores, and labels. - targets (list): List of ground truth annotations for the batch. - """ - img_ids, _outs = [], [] - orig_img_dims = [] - for idx, t in enumerate(targets): - if len(t) > 0: - img_ids.append(t[0]['image_id']) - orig_img_dims.append(t[0]['orig_img_dims']) - _outs.append([o[idx] for o in outputs]) - - batch_detections = self.format_results(_outs, img_ids, orig_img_dims, self.output_resize) - - self.all_detections.extend(batch_detections) - - def result(self) -> List[float]: - """ - Calculate and print evaluation results. - - Returns: - list: COCO evaluation statistics. - """ - # Initialize COCO evaluation object - self.coco_dt = self.coco_gt.loadRes(self.all_detections) - if self.task == 'Detection': - coco_eval = COCOeval(self.coco_gt, self.coco_dt, 'bbox') - elif self.task == 'Keypoints': - coco_eval = COCOeval(self.coco_gt, self.coco_dt, 'keypoints') - else: - raise Exception("Unsupported task type of CocoEval") - - # Run evaluation - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - - # Print mAP results - print("mAP: {:.4f}".format(coco_eval.stats[0])) - - return coco_eval.stats - - def reset(self): - """ - Reset the list of detections to prepare for a new evaluation. - """ - self.all_detections = [] - - def format_results(self, outputs: List, img_ids: List, orig_img_dims: List, output_resize: Dict) -> List[Dict]: - """ - Format model outputs into a list of detection dictionaries. - - Args: - outputs (list): List of model outputs, typically containing bounding boxes, scores, and labels. - img_ids (list): List of image IDs corresponding to each output. - orig_img_dims (list): List of tuples representing the original image dimensions (h, w) for each output. - output_resize (Dict): Contains the resize information to map between the model's - output and the original image dimensions. - - Returns: - list: A list of detection dictionaries, each containing information about the detected object. - """ - detections = [] - h_model, w_model = output_resize['shape'] - preserve_aspect_ratio = output_resize['aspect_ratio_preservation'] - normalized_coords = output_resize.get('normalized_coords', True) - align_center = output_resize.get('align_center', True) - - if self.task == 'Detection': - # Process model outputs and convert to detection format - for idx, output in enumerate(outputs): - image_id = img_ids[idx] - scores = output[1].numpy().squeeze() # Extract scores - labels = (coco80_to_coco91( - output[2].numpy())).squeeze() # Convert COCO 80-class indices to COCO 91-class indices - boxes = output[0].numpy().squeeze() # Extract bounding boxes - boxes = scale_boxes(boxes, orig_img_dims[idx][0], orig_img_dims[idx][1], h_model, w_model, - preserve_aspect_ratio, align_center, normalized_coords) - - for score, label, box in zip(scores, labels, boxes): - detection = { - "image_id": image_id, - "category_id": label, - "bbox": [box[1], box[0], box[3] - box[1], box[2] - box[0]], - "score": score - } - detections.append(detection) - - elif self.task == 'Keypoints': - for output, image_id, (w_orig, h_orig) in zip(outputs, img_ids, orig_img_dims): - - bbox, scores, kpts = output - - # Add detection results to predicted_keypoints list - if kpts.shape[0]: - kpts = kpts.reshape(-1, 17, 3) - kpts = scale_coords(kpts, h_orig, w_orig, 640, 640, True) - for ind, k in enumerate(kpts): - detections.append({ - 'category_id': 1, - 'image_id': image_id, - 'keypoints': k.reshape(51).tolist(), - 'score': scores.tolist()[ind] if isinstance(scores.tolist(), list) else scores.tolist() - }) - - return detections - -def load_and_preprocess_image(image_path: str, preprocess: Callable) -> np.ndarray: - """ - Load and preprocess an image from a given file path. - - Args: - image_path (str): Path to the image file. - preprocess (function): Preprocessing function to apply to the loaded image. - - Returns: - numpy.ndarray: Preprocessed image. - """ - image = cv2.imread(image_path) - image = preprocess(image) - return image - - -class CocoDataset: - def __init__(self, dataset_folder: str, annotation_file: str, preprocess: Callable): - """ - A dataset class for handling COCO dataset images and annotations. - - Args: - dataset_folder (str): The path to the folder containing COCO dataset images. - annotation_file (str): The path to the COCO annotation file in JSON format. - preprocess (Callable): A function for preprocessing images. - """ - self.dataset_folder = dataset_folder - self.preprocess = preprocess - - # Load COCO annotations from a JSON file (e.g., 'annotations.json') - with open(annotation_file, 'r') as f: - self.coco_annotations = json.load(f) - - # Initialize a dictionary to store annotations grouped by image ID - self.annotations_by_image = {} - - # Iterate through the annotations and group them by image ID - for annotation in self.coco_annotations['annotations']: - image_id = annotation['image_id'] - if image_id not in self.annotations_by_image: - self.annotations_by_image[image_id] = [] - self.annotations_by_image[image_id].append(annotation) - - # Initialize a list to collect images and annotations for the current batch - self.total_images = len(self.coco_annotations['images']) - - def __len__(self): - return self.total_images - - def __getitem__(self, item_index): - """ - Returns the preprocessed image and its corresponding annotations. - - Args: - item_index: Index of the item to retrieve. - - Returns: - Tuple containing the preprocessed image and its annotations. - """ - image_info = self.coco_annotations['images'][item_index] - image_id = image_info['id'] - image = load_and_preprocess_image(os.path.join(self.dataset_folder, image_info['file_name']), self.preprocess) - annotations = self.annotations_by_image.get(image_id, []) - if len(annotations) > 0: - annotations[0]['orig_img_dims'] = (image_info['height'], image_info['width']) - return image, annotations - - def sample(self, batch_size): - """ - Samples a batch of images and their corresponding annotations from the dataset. - - Returns: - Tuple containing a batch of preprocessed images and their annotations. - """ - batch_images = [] - batch_annotations = [] - - # Sample random image indexes - random_idx = random.sample(range(self.total_images), batch_size) - - # Get the corresponding items from dataset - for idx in random_idx: - batch_images.append(self[idx][0]) - batch_annotations.append(self[idx][1]) - - return np.array(batch_images), batch_annotations - - -class DataLoader: - def __init__(self, dataset: List[Tuple], batch_size: int, shuffle: bool = False): - self.dataset = dataset - self.batch_size = batch_size - self.shuffle = shuffle - self.count = 0 - self.inds = list(range(len(dataset))) - - def __iter__(self): - self.count = 0 - if self.shuffle: - random.shuffle(self.inds) - - return self - - def __next__(self): - if self.count >= len(self.dataset): - raise StopIteration - - batch_images = [] - batch_annotations = [] - - while len(batch_images) < self.batch_size and self.count < len(self.dataset): - index = self.inds[self.count] - image, annotations = self.dataset[index] - batch_images.append(image) - batch_annotations.append(annotations) - self.count += 1 - - return np.array(batch_images), batch_annotations - - -def coco_dataset_generator(dataset_folder: str, annotation_file: str, preprocess: Callable, - batch_size: int = 1) -> Tuple: - - """ - Generator function for loading and preprocessing images and their annotations from a COCO-style dataset. - - Args: - dataset_folder (str): Path to the dataset folder containing image files. - annotation_file (str): Path to the COCO-style annotation JSON file. - preprocess (function): Preprocessing function to apply to each loaded image. - batch_size (int): The desired batch size. - - Yields: - Tuple[numpy.ndarray, list]: A tuple containing a batch of images (as a NumPy array) and a list of annotations - for each image in the batch. - """ - # Load COCO annotations from a JSON file (e.g., 'annotations.json') - with open(annotation_file, 'r') as f: - coco_annotations = json.load(f) - - # Initialize a dictionary to store annotations grouped by image ID - annotations_by_image = {} - - # Iterate through the annotations and group them by image ID - for annotation in coco_annotations['annotations']: - image_id = annotation['image_id'] - if image_id not in annotations_by_image: - annotations_by_image[image_id] = [] - annotations_by_image[image_id].append(annotation) - - # Initialize a list to collect images and annotations for the current batch - batch_images = [] - batch_annotations = [] - total_images = len(coco_annotations['images']) - - # Iterate through the images and create a list of tuples (image, annotations) - for image_count, image_info in enumerate(coco_annotations['images']): - image_id = image_info['id'] - # Load and preprocess the image (you can use your own image loading logic) - image = load_and_preprocess_image(os.path.join(dataset_folder, image_info['file_name']), preprocess) - annotations = annotations_by_image.get(image_id, []) - if len(annotations) > 0: - annotations[0]['orig_img_dims'] = (image_info['height'], image_info['width']) - - # Add the image and annotations to the current batch - batch_images.append(image) - batch_annotations.append(annotations) - - # Check if the current batch is of the desired batch size - if len(batch_images) == batch_size: - # Yield the current batch - yield np.array(batch_images), batch_annotations - - # Reset the batch lists for the next batch - batch_images = [] - batch_annotations = [] - - # After processing all images, yield any remaining images in the last batch - if len(batch_images) > 0 and (total_images == image_count + 1): - yield np.array(batch_images), batch_annotations - - -def model_predict(model: Any, - inputs: np.ndarray) -> Tuple[List, List, List, List]: - """ - Perform inference using the provided model on the given inputs. - - This function serves as the default method for inference if no specific model inference function is provided. - - Args: - model (Any): The model used for inference. - inputs (np.ndarray): Input data to perform inference on. - - Returns: - Tuple[List, List, List, List]: Tuple containing lists of predictions. - """ - return model(inputs) - - -def coco_evaluate(model: Any, preprocess: Callable, dataset_folder: str, annotation_file: str, batch_size: int, - output_resize: tuple, model_inference: Callable = model_predict, task: str = 'Detection') -> dict: - """ - Evaluate a model on the COCO dataset. - - Args: - - model (Any): The model to evaluate. - - preprocess (Callable): Preprocessing function to be applied to images. - - dataset_folder (str): Path to the folder containing COCO dataset images. - - annotation_file (str): Path to the COCO annotation file. - - batch_size (int): Batch size for evaluation. - - output_resize (tuple): Tuple representing the output size after resizing. - - model_inference (Callable): Model inference function. model_predict will be used by default. - - Returns: - - dict: Evaluation results. - - """ - # Load COCO evaluation set - coco_dataset = CocoDataset(dataset_folder=dataset_folder, - annotation_file=annotation_file, - preprocess=preprocess) - coco_loader = DataLoader(coco_dataset, batch_size) - - # Initialize the evaluation metric object - coco_metric = CocoEval(annotation_file, output_resize, task) - - # Iterate and the evaluation set - for batch_idx, (images, targets) in enumerate(coco_loader): - - # Run inference on the batch - outputs = model_inference(model, images) - - # Add the model outputs to metric object (a dictionary of outputs after postprocess: boxes, scores & classes) - coco_metric.add_batch_detections(outputs, targets) - if (batch_idx + 1) % 100 == 0: - print(f'processed {(batch_idx + 1) * batch_size} images') - - return coco_metric.result() - -def masks_to_coco_rle(masks, boxes, image_id, height, width, scores, classes, mask_threshold): - """ - Converts masks to COCO RLE format and compiles results including bounding boxes and scores. - - Args: - masks (list of np.ndarray): List of segmentation masks. - boxes (list of np.ndarray): List of bounding boxes corresponding to the masks. - image_id (int): Identifier for the image being processed. - height (int): Height of the image. - width (int): Width of the image. - scores (list of float): Confidence scores for each detection. - classes (list of int): Class IDs for each detection. - - Returns: - list of dict: Each dictionary contains the image ID, category ID, bounding box, - score, and segmentation in RLE format. - """ - results = [] - for i, (mask, box) in enumerate(zip(masks, boxes)): - - binary_mask = np.asfortranarray((mask > mask_threshold).astype(np.uint8)) - rle = mask_utils.encode(binary_mask) - rle['counts'] = rle['counts'].decode('ascii') - - x_min, y_min, x_max, y_max = box[1], box[0], box[3], box[2] - box_width = x_max - x_min - box_height = y_max - y_min - - adjusted_category_id = coco80_to_coco91(np.array([classes[i]]))[0] - - result = { - "image_id": int(image_id), # Convert to int if not already - "category_id": int(adjusted_category_id), # Ensure type is int - "bbox": [float(x_min), float(y_min), float(box_width), float(box_height)], - "score": float(scores[i]), # Ensure type is float - "segmentation": rle - } - results.append(result) - return results - -def save_results_to_json(results, file_path): - """ - Saves the results data to a JSON file. - - Args: - results (list of dict): The results data to be saved. - file_path (str): The path to the file where the results will be saved. - """ - with open(file_path, 'w') as f: - json.dump(results, f) - -def evaluate_seg_model(annotation_file, results_file): - """ - Evaluate the model's segmentation performance using the COCO evaluation metrics. - - This function loads the ground truth annotations and the detection results from specified files, - filters the annotations to include only those images present in the detection results, and then - performs the COCO evaluation. - - Args: - annotation_file (str): The file path for the COCO format ground truth annotations. - results_file (str): The file path for the detection results in COCO format. - - The function prints out the evaluation summary which includes average precision and recall - across various IoU thresholds and object categories. - """ - - coco_gt = COCO(annotation_file) - coco_dt = coco_gt.loadRes(results_file) - - # Extract image IDs from the results file - with open(results_file, 'r') as file: - results_data = json.load(file) - result_img_ids = {result['image_id'] for result in results_data} - - # Filter annotations to include only those images present in the results file - coco_gt.imgs = {img_id: coco_gt.imgs[img_id] for img_id in result_img_ids if img_id in coco_gt.imgs} - coco_gt.anns = {ann_id: coco_gt.anns[ann_id] for ann_id in list(coco_gt.anns.keys()) if coco_gt.anns[ann_id]['image_id'] in result_img_ids} - - # Evaluate only for the filtered images - coco_eval = COCOeval(coco_gt, coco_dt, 'segm') - coco_eval.params.imgIds = list(result_img_ids) # Ensure evaluation is only on the filtered image IDs - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - - -def evaluate_yolov8_segmentation(model, model_predict_func, data_dir, data_type='val2017', img_ids_limit=800, output_file='results.json',iou_thresh=0.7, conf=0.001, max_dets=300,mask_thresh=0.55): - """ - Evaluate YOLOv8 model for instance segmentation on COCO dataset. - - Parameters: - - model: The YOLOv8 model to be evaluated. - - model_predict_func: A function to execute the model preidction - - data_dir: The directory containing the COCO dataset. - - data_type: The type of dataset to evaluate against (default is 'val2017'). - - img_ids_limit: The maximum number of images to evaluate (default is 800). - - output_file: The name of the file to save the results (default is 'results.json'). - - Returns: - - None - """ - model_input_size = (640, 640) - model.eval() - - ann_file = os.path.join(data_dir, 'annotations', f'instances_{data_type}.json') - coco = COCO(ann_file) - - img_ids = coco.getImgIds() - img_ids = img_ids[:img_ids_limit] # Adjust number of images to evaluate against - results = [] - for img_id in tqdm(img_ids, desc="Processing Images"): - img = coco.loadImgs(img_id)[0] - image_path = os.path.join(data_dir, data_type, img["file_name"]) - - # Preprocess the image - input_img = load_and_preprocess_image(image_path, yolov8_preprocess_chw_transpose).astype('float32') - - # Run the model - output = model_predict_func(model, input_img) - - #run post processing (nms) - boxes, scores, classes, masks = postprocess_yolov8_inst_seg(outputs=output, conf=conf, iou_thres=iou_thresh, max_out_dets=max_dets) - - if boxes.size == 0: - continue - - orig_img = load_and_preprocess_image(image_path, lambda x: x) - boxes = scale_boxes(boxes, orig_img.shape[0], orig_img.shape[1], 640, 640, True, False) - pp_masks = process_masks(masks, boxes, orig_img.shape, model_input_size) - - #convert output to coco readable - image_results = masks_to_coco_rle(pp_masks, boxes, img_id, orig_img.shape[0], orig_img.shape[1], scores, classes, mask_thresh) - results.extend(image_results) - - save_results_to_json(results, output_file) - evaluate_seg_model(ann_file, output_file) diff --git a/tutorials/mct_model_garden/evaluation_metrics/coco_evaluation_utils.py b/tutorials/mct_model_garden/evaluation_metrics/coco_evaluation_utils.py deleted file mode 100644 index d79ef1e0e..000000000 --- a/tutorials/mct_model_garden/evaluation_metrics/coco_evaluation_utils.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from enum import Enum -import numpy as np -from typing import List - -class BoxFormat(Enum): - YMIM_XMIN_YMAX_XMAX = 'ymin_xmin_ymax_xmax' - XMIM_YMIN_XMAX_YMAX = 'xmin_ymin_xmax_ymax' - XMIN_YMIN_W_H = 'xmin_ymin_width_height' - XC_YC_W_H = 'xc_yc_width_height' - - -def convert_to_ymin_xmin_ymax_xmax_format(boxes, orig_format: BoxFormat): - """ - changes the box from one format to another (XMIN_YMIN_W_H --> YMIM_XMIN_YMAX_XMAX ) - also support in same format mode (returns the same format) - - :param boxes: - :param orig_format: - :return: box in format YMIM_XMIN_YMAX_XMAX - """ - if len(boxes) == 0: - return boxes - elif orig_format == BoxFormat.YMIM_XMIN_YMAX_XMAX: - return boxes - elif orig_format == BoxFormat.XMIN_YMIN_W_H: - boxes[:, 2] += boxes[:, 0] # convert width to xmax - boxes[:, 3] += boxes[:, 1] # convert height to ymax - boxes[:, 0], boxes[:, 1] = boxes[:, 1], boxes[:, 0].copy() # swap xmin, ymin columns - boxes[:, 2], boxes[:, 3] = boxes[:, 3], boxes[:, 2].copy() # swap xmax, ymax columns - return boxes - elif orig_format == BoxFormat.XMIM_YMIN_XMAX_YMAX: - boxes[:, 0], boxes[:, 1] = boxes[:, 1], boxes[:, 0].copy() # swap xmin, ymin columns - boxes[:, 2], boxes[:, 3] = boxes[:, 3], boxes[:, 2].copy() # swap xmax, ymax columns - return boxes - elif orig_format == BoxFormat.XC_YC_W_H: - new_boxes = np.copy(boxes) - new_boxes[:, 0] = boxes[:, 1] - boxes[:, 3] / 2 # top left y - new_boxes[:, 1] = boxes[:, 0] - boxes[:, 2] / 2 # top left x - new_boxes[:, 2] = boxes[:, 1] + boxes[:, 3] / 2 # bottom right y - new_boxes[:, 3] = boxes[:, 0] + boxes[:, 2] / 2 # bottom right x - return new_boxes - else: - raise Exception("Unsupported boxes format") - -def clip_boxes(boxes: np.ndarray, h: int, w: int) -> np.ndarray: - """ - Clip bounding boxes to stay within the image boundaries. - - Args: - boxes (numpy.ndarray): Array of bounding boxes in format [y_min, x_min, y_max, x_max]. - h (int): Height of the image. - w (int): Width of the image. - - Returns: - numpy.ndarray: Clipped bounding boxes. - """ - boxes[..., 0] = np.clip(boxes[..., 0], a_min=0, a_max=h) - boxes[..., 1] = np.clip(boxes[..., 1], a_min=0, a_max=w) - boxes[..., 2] = np.clip(boxes[..., 2], a_min=0, a_max=h) - boxes[..., 3] = np.clip(boxes[..., 3], a_min=0, a_max=w) - return boxes - - -def scale_boxes(boxes: np.ndarray, h_image: int, w_image: int, h_model: int, w_model: int, preserve_aspect_ratio: bool, - align_center: bool = True, normalized: bool = True) -> np.ndarray: - """ - Scale and offset bounding boxes based on model output size and original image size. - - Args: - boxes (numpy.ndarray): Array of bounding boxes in format [y_min, x_min, y_max, x_max]. - h_image (int): Original image height. - w_image (int): Original image width. - h_model (int): Model output height. - w_model (int): Model output width. - preserve_aspect_ratio (bool): Whether to preserve image aspect ratio during scaling - align_center (bool): Whether to center the bounding boxes after scaling - normalized (bool): Whether treats bounding box coordinates as normalized (i.e., in the range [0, 1]) - - Returns: - numpy.ndarray: Scaled and offset bounding boxes. - """ - deltaH, deltaW = 0, 0 - H, W = h_model, w_model - scale_H, scale_W = h_image / H, w_image / W - - if preserve_aspect_ratio: - scale_H = scale_W = max(h_image / H, w_image / W) - H_tag = int(np.round(h_image / scale_H)) - W_tag = int(np.round(w_image / scale_W)) - if align_center: - deltaH, deltaW = int((H - H_tag) / 2), int((W - W_tag) / 2) - - nh, nw = (H, W) if normalized else (1, 1) - - # Scale and offset boxes - boxes[..., 0] = (boxes[..., 0] * nh - deltaH) * scale_H - boxes[..., 1] = (boxes[..., 1] * nw - deltaW) * scale_W - boxes[..., 2] = (boxes[..., 2] * nh - deltaH) * scale_H - boxes[..., 3] = (boxes[..., 3] * nw - deltaW) * scale_W - - # Clip boxes - boxes = clip_boxes(boxes, h_image, w_image) - - return boxes - - -def scale_coords(kpts: np.ndarray, h_image: int, w_image: int, h_model: int, w_model: int, preserve_aspect_ratio: bool) -> np.ndarray: - """ - Scale and offset keypoints based on model output size and original image size. - - Args: - kpts (numpy.ndarray): Array of bounding keypoints in format [..., 17, 3] where the last dim is (x, y, visible). - h_image (int): Original image height. - w_image (int): Original image width. - h_model (int): Model output height. - w_model (int): Model output width. - preserve_aspect_ratio (bool): Whether to preserve image aspect ratio during scaling - - Returns: - numpy.ndarray: Scaled and offset bounding boxes. - """ - deltaH, deltaW = 0, 0 - H, W = h_model, w_model - scale_H, scale_W = h_image / H, w_image / W - - if preserve_aspect_ratio: - scale_H = scale_W = max(h_image / H, w_image / W) - H_tag = int(np.round(h_image / scale_H)) - W_tag = int(np.round(w_image / scale_W)) - deltaH, deltaW = int((H - H_tag) / 2), int((W - W_tag) / 2) - - # Scale and offset boxes - kpts[..., 0] = (kpts[..., 0] - deltaH) * scale_H - kpts[..., 1] = (kpts[..., 1] - deltaW) * scale_W - - # Clip boxes - kpts = clip_coords(kpts, h_image, w_image) - - return kpts - -def clip_coords(kpts: np.ndarray, h: int, w: int) -> np.ndarray: - """ - Clip keypoints to stay within the image boundaries. - - Args: - kpts (numpy.ndarray): Array of bounding keypoints in format [..., 17, 3] where the last dim is (x, y, visible). - h (int): Height of the image. - w (int): Width of the image. - - Returns: - numpy.ndarray: Clipped bounding boxes. - """ - kpts[..., 0] = np.clip(kpts[..., 0], a_min=0, a_max=h) - kpts[..., 1] = np.clip(kpts[..., 1], a_min=0, a_max=w) - return kpts - - -def nms(dets: np.ndarray, scores: np.ndarray, iou_thres: float = 0.5, max_out_dets: int = 300) -> List[int]: - """ - Perform Non-Maximum Suppression (NMS) on detected bounding boxes. - - Args: - dets (np.ndarray): Array of bounding box coordinates of shape (N, 4) representing [y1, x1, y2, x2]. - scores (np.ndarray): Array of confidence scores associated with each bounding box. - iou_thres (float, optional): IoU threshold for NMS. Default is 0.5. - max_out_dets (int, optional): Maximum number of output detections to keep. Default is 300. - - Returns: - List[int]: List of indices representing the indices of the bounding boxes to keep after NMS. - - """ - y1, x1 = dets[:, 0], dets[:, 1] - y2, x2 = dets[:, 2], dets[:, 3] - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (areas[i] + areas[order[1:]] - inter) - - inds = np.where(ovr <= iou_thres)[0] - order = order[inds + 1] - - return keep[:max_out_dets] - -def combined_nms(batch_boxes, batch_scores, iou_thres: float = 0.5, conf: float = 0.001, max_out_dets: int = 300): - - """ - Performs combined Non-Maximum Suppression (NMS) on batches of bounding boxes and scores. - - Parameters: - batch_boxes (List[np.ndarray]): A list of arrays, where each array contains bounding boxes for a batch. - batch_scores (List[np.ndarray]): A list of arrays, where each array contains scores for the corresponding bounding boxes. - iou_thres (float): Intersection over Union (IoU) threshold for NMS. Defaults to 0.5. - conf (float): Confidence threshold for filtering boxes. Defaults to 0.001. - max_out_dets (int): Maximum number of output detections per image. Defaults to 300. - - Returns: - List[Tuple[np.ndarray, np.ndarray, np.ndarray]]: A list of tuples for each batch, where each tuple contains: - - nms_bbox: Array of bounding boxes after NMS. - - nms_scores: Array of scores after NMS. - - nms_classes: Array of class IDs after NMS. - """ - nms_results = [] - for boxes, scores in zip(batch_boxes, batch_scores): - - xc = np.argmax(scores, 1) - xs = np.amax(scores, 1) - x = np.concatenate([boxes, np.expand_dims(xs, 1), np.expand_dims(xc, 1)], 1) - - xi = xs > conf - x = x[xi] - - x = x[np.argsort(-x[:, 4])[:8400]] - scores = x[:, 4] - x[..., :4] = convert_to_ymin_xmin_ymax_xmax_format(x[..., :4], BoxFormat.XC_YC_W_H) - offset = x[:, 5] * 640 - boxes = x[..., :4] + np.expand_dims(offset, 1) - - # Original post-processing part - valid_indexs = nms(boxes, scores, iou_thres=iou_thres, max_out_dets=max_out_dets) - x = x[valid_indexs] - nms_classes = x[:, 5] - nms_bbox = x[:, :4] - nms_scores = x[:, 4] - - nms_results.append((nms_bbox, nms_scores, nms_classes)) - - return nms_results - diff --git a/tutorials/mct_model_garden/models_keras/efficientdet/LICENSE b/tutorials/mct_model_garden/models_keras/efficientdet/LICENSE deleted file mode 100644 index b4e9438bd..000000000 --- a/tutorials/mct_model_garden/models_keras/efficientdet/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2019 Ross Wightman - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_keras/efficientdet/__init__.py b/tutorials/mct_model_garden/models_keras/efficientdet/__init__.py deleted file mode 100644 index 52f463aa7..000000000 --- a/tutorials/mct_model_garden/models_keras/efficientdet/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from tutorials.mct_model_garden.models_keras.efficientdet.effdet_keras import EfficientDetKeras \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_keras/efficientdet/effdet_keras.py b/tutorials/mct_model_garden/models_keras/efficientdet/effdet_keras.py deleted file mode 100644 index 18778b9ed..000000000 --- a/tutorials/mct_model_garden/models_keras/efficientdet/effdet_keras.py +++ /dev/null @@ -1,662 +0,0 @@ -# --------------------------------------------------------------- -# Copyright 2019 Ross Wightman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# --------------------------------------------------------------- - -""" -The following code was mostly duplicated from https://github.com/rwightman/efficientdet-pytorch -and changed to generate an equivalent Keras model. -Main changes: - * Torch layers replaced with Keras layers - * removed class inheritance from torch.nn.Module - * changed "forward" class methods with "__call__" -""" - -import logging -from collections import OrderedDict -from functools import partial -from typing import List, Optional, Union, Tuple - -import tensorflow as tf - - -gpus = tf.config.list_physical_devices('GPU') -if gpus: - try: - # Currently, memory growth needs to be the same across GPUs - for gpu in gpus: - tf.config.experimental.set_memory_growth(gpu, True) - logical_gpus = tf.config.list_logical_devices('GPU') - print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") - except RuntimeError as e: - # Memory growth must be set before GPUs have been initialized - print(e) - - -from effdet.anchors import Anchors, get_feat_sizes -from effdet.config import get_fpn_config, set_config_readonly -from effdet.efficientdet import get_feature_info -from tutorials.mct_model_garden.models_keras.efficientdet.effnet_keras import create_model, handle_name -from tutorials.mct_model_garden.models_keras.efficientdet.effnet_blocks_keras import create_conv2d, create_pool2d -from tutorials.mct_model_garden.models_keras.utils.torch2keras_weights_translation import load_state_dict - -from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess -from sony_custom_layers.keras.object_detection import ScoreConverter - -_DEBUG = False -_USE_SCALE = False -_ACT_LAYER = tf.nn.swish - -# ####################################################################################### -# This file generates the Keras model. It's based on the EfficientDet repository in -# https://github.com/rwightman/efficientdet-pytorch, and switched the Torch Modules -# with Keras layers -# ####################################################################################### - -def get_act_layer(act_type): - if act_type == 'relu6': - return partial(tf.keras.layers.ReLU, max_value=6.0) - else: - raise NotImplemented - - -class ConvBnAct2d: - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - dilation=1, - padding='', - bias=False, - norm_layer=tf.keras.layers.BatchNormalization, - act_layer=_ACT_LAYER, - name=None - ): - name = handle_name(name) - self.conv = create_conv2d( - in_channels, - out_channels, - kernel_size, - stride=stride, - dilation=dilation, - padding=padding, - bias=bias, - name=name + '/conv' - ) - self.bn = None if norm_layer is None else norm_layer(name=name + '/bn') - self.act = None if act_layer is None else act_layer() - - def __call__(self, x): - x = self.conv(x) - if self.bn is not None: - x = self.bn(x) - if self.act is not None: - x = self.act(x) - return x - - -class SeparableConv2d: - """ Separable Conv - """ - def __init__( - self, - in_channels, - out_channels, - kernel_size=3, - stride=1, - dilation=1, - padding='', - bias=False, - channel_multiplier=1.0, - pw_kernel_size=1, - norm_layer=tf.keras.layers.BatchNormalization, - act_layer=_ACT_LAYER, - name=None - ): - name = handle_name(name) - self.conv_dw = create_conv2d( - in_channels, - int(in_channels * channel_multiplier), - kernel_size, - stride=stride, - dilation=dilation, - padding=padding, - depthwise=True, - name=name + '/conv_dw' - ) - self.conv_pw = create_conv2d( - int(in_channels * channel_multiplier), - out_channels, - pw_kernel_size, - padding=padding, - bias=bias, - name=name + '/conv_pw' - ) - self.bn = None if norm_layer is None else norm_layer(name=name + '/bn') - self.act = None if act_layer is None else act_layer() - - def __call__(self, x): - x = self.conv_dw(x) - x = self.conv_pw(x) - if self.bn is not None: - x = self.bn(x) - if self.act is not None: - x = self.act(x) - return x - - -class Interpolate2d: - r"""Resamples a 2d Image - - The input data is assumed to be of the form - `minibatch x channels x [optional depth] x [optional height] x width`. - Hence, for spatial inputs, we expect a 4D Tensor and for volumetric inputs, we expect a 5D Tensor. - - The algorithms available for upsampling are nearest neighbor and linear, - bilinear, bicubic and trilinear for 3D, 4D and 5D input Tensor, - respectively. - - One can either give a :attr:`scale_factor` or the target output :attr:`size` to - calculate the output size. (You cannot give both, as it is ambiguous) - - Args: - size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int], optional): - output spatial sizes - scale_factor (float or Tuple[float] or Tuple[float, float] or Tuple[float, float, float], optional): - multiplier for spatial size. Has to match input size if it is a tuple. - mode (str, optional): the upsampling algorithm: one of ``'nearest'``, - ``'linear'``, ``'bilinear'``, ``'bicubic'`` and ``'trilinear'``. - Default: ``'nearest'`` - align_corners (bool, optional): if ``True``, the corner pixels of the input - and output tensors are aligned, and thus preserving the values at - those pixels. This only has effect when :attr:`mode` is - ``'linear'``, ``'bilinear'``, or ``'trilinear'``. Default: ``False`` - """ - __constants__ = ['size', 'scale_factor', 'mode', 'align_corners', 'name'] - name: str - size: Optional[Union[int, Tuple[int, int]]] - scale_factor: Optional[Union[float, Tuple[float, float]]] - mode: str - align_corners: Optional[bool] - - def __init__( - self, - size: Optional[Union[int, Tuple[int, int]]] = None, - scale_factor: Optional[Union[float, Tuple[float, float]]] = None, - mode: str = 'nearest', - align_corners: bool = False, - ) -> None: - self.name = type(self).__name__ - self.size = size - if isinstance(scale_factor, tuple): - self.scale_factor = tuple(float(factor) for factor in scale_factor) - else: - self.scale_factor = float(scale_factor) if scale_factor else None - self.mode = mode - self.align_corners = None if mode == 'nearest' else align_corners - - # tested in keras - assert self.align_corners in [None, False] - assert self.scale_factor is None - if self.mode == 'nearest': - self.mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR - else: - raise NotImplemented - - def __call__(self, input: tf.Tensor) -> tf.Tensor: - return tf.image.resize(input, self.size, method=self.mode) - - -class ResampleFeatureMap: - - def __init__( - self, - in_channels, - out_channels, - input_size, - output_size, - pad_type='', - downsample=None, - upsample=None, - norm_layer=tf.keras.layers.BatchNormalization, - apply_bn=False, - redundant_bias=False, - name=None - ): - name = handle_name(name) - downsample = downsample or 'max' - upsample = upsample or 'nearest' - self.in_channels = in_channels - self.out_channels = out_channels - self.input_size = input_size - self.output_size = output_size - - self.layers = [] - if in_channels != out_channels: - self.layers.append(ConvBnAct2d( - in_channels, - out_channels, - kernel_size=1, - padding=pad_type, - norm_layer=norm_layer if apply_bn else None, - bias=not apply_bn or redundant_bias, - act_layer=None, - name=f'{name}/conv'#/{len(self.layers)}' - )) - - if input_size[0] > output_size[0] and input_size[1] > output_size[1]: - if downsample in ('max', 'avg'): - stride_size_h = int((input_size[0] - 1) // output_size[0] + 1) - stride_size_w = int((input_size[1] - 1) // output_size[1] + 1) - if stride_size_h == stride_size_w: - kernel_size = stride_size_h + 1 - stride = stride_size_h - else: - # FIXME need to support tuple kernel / stride input to padding fns - kernel_size = (stride_size_h + 1, stride_size_w + 1) - stride = (stride_size_h, stride_size_w) - down_inst = create_pool2d(downsample, kernel_size=kernel_size, stride=stride, padding=pad_type, - name=name + '/downsample') - else: - if _USE_SCALE: # FIXME not sure if scale vs size is better, leaving both in to test for now - scale = (output_size[0] / input_size[0], output_size[1] / input_size[1]) - down_inst = Interpolate2d(scale_factor=scale, mode=downsample) - else: - down_inst = Interpolate2d(size=output_size, mode=downsample, name=name) - self.layers.append(down_inst) - else: - if input_size[0] < output_size[0] or input_size[1] < output_size[1]: - if _USE_SCALE: - scale = (output_size[0] / input_size[0], output_size[1] / input_size[1]) - self.add_module('upsample', Interpolate2d(scale_factor=scale, mode=upsample)) - else: - self.layers.append(Interpolate2d(size=output_size, mode=upsample)) # 'upsample' - - def __call__(self, x: tf.Tensor) -> List[tf.Tensor]: - for module in self.layers: - x = module(x) - return x - - -class FpnCombine: - def __init__( - self, - feature_info, - fpn_channels, - inputs_offsets, - output_size, - pad_type='', - downsample=None, - upsample=None, - norm_layer=tf.keras.layers.BatchNormalization, - apply_resample_bn=False, - redundant_bias=False, - weight_method='attn', - name=None - ): - name = handle_name(name) - self.inputs_offsets = inputs_offsets - self.weight_method = weight_method - - self.resample = [] # nn.ModuleDict() - for idx, offset in enumerate(inputs_offsets): - self.resample.append(ResampleFeatureMap( - feature_info[offset]['num_chs'], - fpn_channels, - input_size=feature_info[offset]['size'], - output_size=output_size, - pad_type=pad_type, - downsample=downsample, - upsample=upsample, - norm_layer=norm_layer, - apply_bn=apply_resample_bn, - redundant_bias=redundant_bias, - name = name + f'/resample/{offset}' - )) - - if weight_method == 'attn' or weight_method == 'fastattn': - self.edge_weights = nn.Parameter(torch.ones(len(inputs_offsets)), requires_grad=True) # WSM - else: - self.edge_weights = None - - def __call__(self, x: List[tf.Tensor]): - dtype = x[0].dtype - nodes = [] - for offset, resample in zip(self.inputs_offsets, self.resample): - input_node = x[offset] - input_node = resample(input_node) - nodes.append(input_node) - - if self.weight_method == 'attn': - normalized_weights = torch.softmax(self.edge_weights.to(dtype=dtype), dim=0) - out = torch.stack(nodes, dim=-1) * normalized_weights - out = torch.sum(out, dim=-1) - elif self.weight_method == 'fastattn': - edge_weights = nn.functional.relu(self.edge_weights.to(dtype=dtype)) - weights_sum = torch.sum(edge_weights) - out = torch.stack( - [(nodes[i] * edge_weights[i]) / (weights_sum + 0.0001) for i in range(len(nodes))], dim=-1) - out = torch.sum(out, dim=-1) - elif self.weight_method == 'sum': - out = tf.keras.layers.Add()(nodes[:2]) - for i in range(2, len(nodes)): - out = tf.keras.layers.Add()([out, nodes[i]]) - else: - raise ValueError('unknown weight_method {}'.format(self.weight_method)) - return out - - -class Fnode: - """ A simple wrapper used in place of nn.Sequential for torchscript typing - Handles input type List[Tensor] -> output type Tensor - """ - def __init__(self, combine, after_combine): - self.combine = combine - self.after_combine = after_combine - - def __call__(self, x: List[tf.Tensor]) -> tf.Tensor: - x = self.combine(x) - for fn in self.after_combine: - x = fn(x) - return x - - -class BiFpnLayer: - def __init__( - self, - feature_info, - feat_sizes, - fpn_config, - fpn_channels, - num_levels=5, - pad_type='', - downsample=None, - upsample=None, - norm_layer=tf.keras.layers.BatchNormalization, - act_layer=_ACT_LAYER, - apply_resample_bn=False, - pre_act=True, - separable_conv=True, - redundant_bias=False, - name=None - ): - name = handle_name(name) - self.num_levels = num_levels - # fill feature info for all FPN nodes (chs and feat size) before creating FPN nodes - fpn_feature_info = feature_info + [ - dict(num_chs=fpn_channels, size=feat_sizes[fc['feat_level']]) for fc in fpn_config.nodes] - - self.fnode = [] # nn.ModuleList() - for i, fnode_cfg in enumerate(fpn_config.nodes): - logging.debug('fnode {} : {}'.format(i, fnode_cfg)) - combine = FpnCombine( - fpn_feature_info, - fpn_channels, - tuple(fnode_cfg['inputs_offsets']), - output_size=feat_sizes[fnode_cfg['feat_level']], - pad_type=pad_type, - downsample=downsample, - upsample=upsample, - norm_layer=norm_layer, - apply_resample_bn=apply_resample_bn, - redundant_bias=redundant_bias, - weight_method=fnode_cfg['weight_method'], - name=f'{name}/fnode/{i}/combine' - ) - - after_combine = [] # nn.Sequential() - conv_kwargs = dict( - in_channels=fpn_channels, - out_channels=fpn_channels, - kernel_size=3, - padding=pad_type, - bias=False, - norm_layer=norm_layer, - act_layer=act_layer, - ) - if pre_act: - conv_kwargs['bias'] = redundant_bias - conv_kwargs['act_layer'] = None - after_combine.append(act_layer()) # 'act' - after_combine.append( - SeparableConv2d(name=f'{name}/fnode/{i}/after_combine/conv', **conv_kwargs) if separable_conv - else ConvBnAct2d(name=f'{name}/fnode/{i}/after_combine/conv', **conv_kwargs)) - - self.fnode.append(Fnode(combine=combine, after_combine=after_combine)) - - self.feature_info = fpn_feature_info[-num_levels::] - - def __call__(self, x: List[tf.Tensor]): - for fn in self.fnode: - x.append(fn(x)) - return x[-self.num_levels::] - - -class BiFpn: - - def __init__(self, config, feature_info, name): - self.num_levels = config.num_levels - norm_layer = config.norm_layer or tf.keras.layers.BatchNormalization - norm_kwargs = {**config.norm_kwargs} - norm_kwargs['epsilon'] = norm_kwargs.pop('eps', 0.001) - if config.norm_kwargs: - norm_layer = partial(norm_layer, **norm_kwargs) - act_layer = get_act_layer(config.act_type) or _ACT_LAYER - fpn_config = config.fpn_config or get_fpn_config( - config.fpn_name, min_level=config.min_level, max_level=config.max_level) - - feat_sizes = get_feat_sizes(config.image_size, max_level=config.max_level) - prev_feat_size = feat_sizes[config.min_level] - self.resample = [] # nn.ModuleDict() - for level in range(config.num_levels): - feat_size = feat_sizes[level + config.min_level] - if level < len(feature_info): - in_chs = feature_info[level]['num_chs'] - feature_info[level]['size'] = feat_size - else: - # Adds a coarser level by downsampling the last feature map - self.resample.append(ResampleFeatureMap( - in_channels=in_chs, - out_channels=config.fpn_channels, - input_size=prev_feat_size, - output_size=feat_size, - pad_type=config.pad_type, - downsample=config.downsample_type, - upsample=config.upsample_type, - norm_layer=norm_layer, - apply_bn=config.apply_resample_bn, - redundant_bias=config.redundant_bias, - name=name + f'/resample/{level}' - )) - in_chs = config.fpn_channels - feature_info.append(dict(num_chs=in_chs, size=feat_size)) - prev_feat_size = feat_size - - self.cell = [] # SequentialList() - for rep in range(config.fpn_cell_repeats): - logging.debug('building cell {}'.format(rep)) - fpn_layer = BiFpnLayer( - feature_info=feature_info, - feat_sizes=feat_sizes, - fpn_config=fpn_config, - fpn_channels=config.fpn_channels, - num_levels=config.num_levels, - pad_type=config.pad_type, - downsample=config.downsample_type, - upsample=config.upsample_type, - norm_layer=norm_layer, - act_layer=act_layer, - separable_conv=config.separable_conv, - apply_resample_bn=config.apply_resample_bn, - pre_act=not config.conv_bn_relu_pattern, - redundant_bias=config.redundant_bias, - name=name + f'/cell/{rep}' - ) - self.cell.append(fpn_layer) - feature_info = fpn_layer.feature_info - - def __call__(self, x: List[tf.Tensor]): - for resample in self.resample: - x.append(resample(x[-1])) - for _cell in self.cell: - x = _cell(x) - return x - - -class HeadNet: - - def __init__(self, config, num_outputs, name): - self.num_levels = config.num_levels - self.bn_level_first = getattr(config, 'head_bn_level_first', False) - norm_layer = config.norm_layer or tf.keras.layers.BatchNormalization - if config.norm_kwargs: - norm_kwargs = {**config.norm_kwargs} - if 'eps' in norm_kwargs: - eps = norm_kwargs.pop('eps') - norm_kwargs['epsilon'] = eps - norm_layer = partial(norm_layer, **norm_kwargs) - act_type = config.head_act_type if getattr(config, 'head_act_type', None) else config.act_type - act_layer = get_act_layer(act_type) or _ACT_LAYER - - # Build convolution repeats - conv_fn = SeparableConv2d if config.separable_conv else ConvBnAct2d - conv_kwargs = dict( - in_channels=config.fpn_channels, - out_channels=config.fpn_channels, - kernel_size=3, - padding=config.pad_type, - bias=config.redundant_bias, - act_layer=None, - norm_layer=None, - ) - self.conv_rep = [conv_fn(name=f'{name}/conv_rep/{_}', **conv_kwargs) for _ in range(config.box_class_repeats)] - - # Build batchnorm repeats. There is a unique batchnorm per feature level for each repeat. - # This can be organized with repeats first or feature levels first in module lists, the original models - # and weights were setup with repeats first, levels first is required for efficient torchscript usage. - self.bn_rep = [] # nn.ModuleList() - if self.bn_level_first: - for _ in range(self.num_levels): - self.bn_rep.append([ - norm_layer(config.fpn_channels, name=f'{name}/bn_rep/{_}/', ) for _ in range(config.box_class_repeats)]) - else: - for _ in range(config.box_class_repeats): - self.bn_rep.append([norm_layer(name=f'{name}/bn_rep/{_}/{_level}/bn') for _level in range(self.num_levels)]) - - self.act = act_layer - - # Prediction (output) layer. Has bias with special init reqs, see init fn. - num_anchors = len(config.aspect_ratios) * config.num_scales - predict_kwargs = dict( - in_channels=config.fpn_channels, - out_channels=num_outputs * num_anchors, - kernel_size=3, - padding=config.pad_type, - bias=True, - norm_layer=None, - act_layer=None, - name=f'{name}/predict' - ) - self.predict = conv_fn(**predict_kwargs) - - def toggle_bn_level_first(self): - """ Toggle the batchnorm layers between feature level first vs repeat first access pattern - Limitations in torchscript require feature levels to be iterated over first. - - This function can be used to allow loading weights in the original order, and then toggle before - jit scripting the model. - """ - new_bn_rep = [] # nn.ModuleList() - for i in range(len(self.bn_rep[0])): - bn_first = [] # nn.ModuleList() - for r in self.bn_rep.children(): - m = r[i] - # NOTE original rep first model def has extra Sequential container with 'bn', this was - # flattened in the level first definition. - bn_first.append(m[0] if isinstance(m, nn.Sequential) else nn.Sequential(OrderedDict([('bn', m)]))) - new_bn_rep.append(bn_first) - self.bn_level_first = not self.bn_level_first - self.bn_rep = new_bn_rep - - def _forward(self, x: List[tf.Tensor]) -> List[tf.Tensor]: - outputs = [] - for level in range(self.num_levels): - x_level = x[level] - for conv, bn in zip(self.conv_rep, self.bn_rep): - x_level = conv(x_level) - x_level = bn[level](x_level) # this is not allowed in torchscript - x_level = self.act()(x_level) - outputs.append(self.predict(x_level)) - return outputs - - def _forward_level_first(self, x: List[tf.Tensor]) -> List[tf.Tensor]: - outputs = [] - for level, bn_rep in enumerate(self.bn_rep): # iterating over first bn dim first makes TS happy - x_level = x[level] - for conv, bn in zip(self.conv_rep, bn_rep): - x_level = conv(x_level) - x_level = bn(x_level) - x_level = self.act()(x_level) - outputs.append(self.predict(x_level)) - return outputs - - def __call__(self, x: List[tf.Tensor]) -> List[tf.Tensor]: - if self.bn_level_first: - return self._forward_level_first(x) - else: - return self._forward(x) - - -class EfficientDetKeras: - - def __init__(self, config, pretrained_backbone=True, alternate_init=False): - self.config = config - set_config_readonly(self.config) - self.backbone = create_model( - config.backbone_name, - features_only=True, - out_indices=self.config.backbone_indices or (2, 3, 4), - pretrained=pretrained_backbone, - **config.backbone_args, - ) - feature_info = get_feature_info(self.backbone) - self.fpn = BiFpn(self.config, feature_info, 'fpn') - self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes, name='class_net') - self.box_net = HeadNet(self.config, num_outputs=4, name='box_net') - - def toggle_head_bn_level_first(self): - """ Toggle the head batchnorm layers between being access with feature_level first vs repeat - """ - self.class_net.toggle_bn_level_first() - self.box_net.toggle_bn_level_first() - - def get_model(self, input_shape, load_state_dict_to_model=True): - _input = tf.keras.layers.Input(shape=input_shape) - x = self.backbone(_input) - x = self.fpn(x) - x_class = self.class_net(x) - x_box = self.box_net(x) - - x_class = [tf.keras.layers.Reshape((-1, self.config.num_classes))(_x) for _x in x_class] - x_class = tf.keras.layers.Concatenate(axis=1)(x_class) - x_box = [tf.keras.layers.Reshape((-1, 4))(_x) for _x in x_box] - x_box = tf.keras.layers.Concatenate(axis=1)(x_box) - - anchors = tf.constant(Anchors.from_config(self.config).boxes.detach().cpu().numpy()) - - ssd_pp = SSDPostProcess(anchors, [1, 1, 1, 1], [*self.config.image_size], - ScoreConverter.SIGMOID, score_threshold=0.001, iou_threshold=0.5, - max_detections=self.config.max_det_per_image) - outputs = ssd_pp((x_box, x_class)) - - model = tf.keras.Model(inputs=_input, outputs=outputs) - if load_state_dict_to_model: - load_state_dict(model, self.config.url) - return model diff --git a/tutorials/mct_model_garden/models_keras/efficientdet/effnet_blocks_keras.py b/tutorials/mct_model_garden/models_keras/efficientdet/effnet_blocks_keras.py deleted file mode 100644 index 4b221bb46..000000000 --- a/tutorials/mct_model_garden/models_keras/efficientdet/effnet_blocks_keras.py +++ /dev/null @@ -1,542 +0,0 @@ -# --------------------------------------------------------------- -# Copyright 2019 Ross Wightman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# --------------------------------------------------------------- - -""" -The following code was mostly duplicated from https://github.com/huggingface/pytorch-image-models -and changed to generate an equivalent Keras model. -Main changes: - * Torch layers replaced with Keras layers - * removed class inheritance from torch.nn.Module - * changed "forward" class methods with "__call__" -""" - -import types -from functools import partial -import tensorflow as tf - -from timm.layers import DropPath, make_divisible - -__all__ = [ - 'SqueezeExcite', 'ConvBnAct', 'DepthwiseSeparableConv', 'InvertedResidual', 'CondConvResidual', 'EdgeResidual'] - - -def handle_name(_name): - return '' if _name is None or _name == '' else _name - - -def num_groups(group_size, channels): - if not group_size: # 0 or None - return 1 # normal conv with 1 group - else: - # NOTE group_size == 1 -> depthwise conv - assert channels % group_size == 0 - return channels // group_size - - -def create_act_layer(act_name, **kwargs): - if isinstance(act_name, str): - raise NotImplemented - elif isinstance(act_name, tf.keras.layers.Layer): - return act_name(**kwargs) - else: - return act_name - - -def get_attn(attn_type): - if isinstance(attn_type, tf.keras.layers.Layer): - return attn_type - module_cls = None - if attn_type: - if isinstance(attn_type, str): - raise NotImplemented - attn_type = attn_type.lower() - # Lightweight attention modules (channel and/or coarse spatial). - # Typically added to existing network architecture blocks in addition to existing convolutions. - if attn_type == 'se': - module_cls = SEModule - elif attn_type == 'ese': - module_cls = EffectiveSEModule - elif attn_type == 'eca': - module_cls = EcaModule - elif attn_type == 'ecam': - module_cls = partial(EcaModule, use_mlp=True) - elif attn_type == 'ceca': - module_cls = CecaModule - elif attn_type == 'ge': - module_cls = GatherExcite - elif attn_type == 'gc': - module_cls = GlobalContext - elif attn_type == 'gca': - module_cls = partial(GlobalContext, fuse_add=True, fuse_scale=False) - elif attn_type == 'cbam': - module_cls = CbamModule - elif attn_type == 'lcbam': - module_cls = LightCbamModule - - # Attention / attention-like modules w/ significant params - # Typically replace some of the existing workhorse convs in a network architecture. - # All of these accept a stride argument and can spatially downsample the input. - elif attn_type == 'sk': - module_cls = SelectiveKernel - elif attn_type == 'splat': - module_cls = SplitAttn - - # Self-attention / attention-like modules w/ significant compute and/or params - # Typically replace some of the existing workhorse convs in a network architecture. - # All of these accept a stride argument and can spatially downsample the input. - elif attn_type == 'lambda': - return LambdaLayer - elif attn_type == 'bottleneck': - return BottleneckAttn - elif attn_type == 'halo': - return HaloAttn - elif attn_type == 'nl': - module_cls = NonLocalAttn - elif attn_type == 'bat': - module_cls = BatNonLocalAttn - - # Woops! - else: - assert False, "Invalid attn module (%s)" % attn_type - elif isinstance(attn_type, bool): - raise NotImplemented - if attn_type: - module_cls = SEModule - else: - module_cls = attn_type - return module_cls - - -def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): - padding = kwargs.pop('padding', '') - s = kwargs.pop('stride', None) - if s is not None: - kwargs.update({'strides': s}) - d = kwargs.pop('dilation', None) - if d is not None: - kwargs.update({'dilation_rate': d}) - assert padding in ['valid', 'same'], 'Not Implemented' - kwargs.setdefault('use_bias', kwargs.pop('bias', False)) - if kwargs.get('groups', -1) == in_chs: - kwargs.pop('groups', None) - return tf.keras.layers.DepthwiseConv2D(kernel_size, padding=padding, **kwargs) - else: - return tf.keras.layers.Conv2D(out_chs, kernel_size, padding=padding, **kwargs) - - -def create_pool2d(pool_type, kernel_size, stride=None, **kwargs): - stride = stride or kernel_size - padding = kwargs.pop('padding', '') - padding, is_dynamic = padding.lower(), True - if is_dynamic: - if pool_type == 'avg': - raise NotImplemented - return AvgPool2dSame(kernel_size, stride=stride, **kwargs) - elif pool_type == 'max': - # return MaxPool2dSame(kernel_size, stride=stride, **kwargs) - return tf.keras.layers.MaxPooling2D(kernel_size, strides=stride, padding=padding.lower()) - else: - assert False, f'Unsupported pool type {pool_type}' - else: - raise NotImplemented - if pool_type == 'avg': - return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs) - elif pool_type == 'max': - return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs) - else: - assert False, f'Unsupported pool type {pool_type}' - - -def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): - """ Select a 2d convolution implementation based on arguments - Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d. - - Used extensively by EfficientNet, MobileNetv3 and related networks. - """ - if isinstance(kernel_size, list): - raise NotImplemented - assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently - if 'groups' in kwargs: - groups = kwargs.pop('groups') - if groups == in_channels: - kwargs['depthwise'] = True - else: - assert groups == 1 - # We're going to use only lists for defining the MixedConv2d kernel groups, - # ints, tuples, other iterables will continue to pass to normal conv and specify h, w. - m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs) - else: - depthwise = kwargs.pop('depthwise', False) - # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0 - groups = in_channels if depthwise else kwargs.pop('groups', 1) - if 'num_experts' in kwargs and kwargs['num_experts'] > 0: - raise NotImplemented - m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs) - else: - m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs) - return m - - -class SqueezeExcite: - """ Squeeze-and-Excitation w/ specific features for EfficientNet/MobileNet family - - Args: - in_chs (int): input channels to layer - rd_ratio (float): ratio of squeeze reduction - act_layer (nn.Module): activation layer of containing block - gate_layer (Callable): attention gate function - force_act_layer (nn.Module): override block's activation fn if this is set/bound - rd_round_fn (Callable): specify a fn to calculate rounding of reduced chs - """ - - def __init__( - self, in_chs, rd_ratio=0.25, rd_channels=None, act_layer=tf.keras.layers.ReLU, - gate_layer=tf.sigmoid, force_act_layer=None, rd_round_fn=None, name=None): - name = handle_name(name) - if rd_channels is None: - rd_round_fn = rd_round_fn or round - rd_channels = rd_round_fn(in_chs * rd_ratio) - act_layer = force_act_layer or act_layer - # self.conv_reduce = nn.Conv2d(in_chs, rd_channels, 1, bias=True) - self.conv_reduce = tf.keras.layers.Conv2D(rd_channels, 1, name=name + 'conv_reduce') - self.act1 = create_act_layer(act_layer, name=name + 'act1') - # self.conv_expand = nn.Conv2d(rd_channels, in_chs, 1, bias=True) - self.conv_expand = tf.keras.layers.Conv2D(in_chs, 1, name=name + 'conv_expand') - self.gate = create_act_layer(gate_layer) - - def __call__(self, x): - x_se = x.mean((2, 3), keepdim=True) - x_se = self.conv_reduce(x_se) - x_se = self.act1(x_se) - x_se = self.conv_expand(x_se) - return x * self.gate(x_se) - - -class ConvBnAct: - """ Conv + Norm Layer + Activation w/ optional skip connection - """ - def __init__( - self, in_chs, out_chs, kernel_size, stride=1, dilation=1, group_size=0, pad_type='', - skip=False, act_layer=tf.keras.layers.ReLU, norm_layer=tf.keras.layers.BatchNormalization, - drop_path_rate=0., name=None): - norm_act_layer = get_norm_act_layer(norm_layer, act_layer) - groups = num_groups(group_size, in_chs) - self.has_skip = skip and stride == 1 and in_chs == out_chs - - self.conv = create_conv2d( - in_chs, out_chs, kernel_size, stride=stride, dilation=dilation, groups=groups, padding=pad_type) - self.bn1 = norm_act_layer(out_chs, inplace=True) - self.drop_path = DropPath(drop_path_rate) if drop_path_rate else nn.Identity() - - def feature_info(self, location): - if location == 'expansion': # output of conv after act, same as block coutput - return dict(module='bn1', hook_type='forward', num_chs=self.conv.filters) - else: # location == 'bottleneck', block output - return dict(module='', num_chs=self.conv.filters) - - def __call__(self, x): - shortcut = x - x = self.conv(x) - x = self.bn1(x) - if self.has_skip: - x = self.drop_path(x) + shortcut - return x - - -class DepthwiseSeparableConv: - """ DepthwiseSeparable block - Used for DS convs in MobileNet-V1 and in the place of IR blocks that have no expansion - (factor of 1.0). This is an alternative to having a IR with an optional first pw conv. - """ - def __init__( - self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, group_size=1, pad_type='', - noskip=False, pw_kernel_size=1, pw_act=False, act_layer=tf.keras.layers.ReLU, - norm_layer=tf.keras.layers.BatchNormalization, se_layer=None, drop_path_rate=0., name=None): - norm_act_layer = get_norm_act_layer(norm_layer, act_layer) - groups = num_groups(group_size, in_chs) - self.has_skip = (stride == 1 and in_chs == out_chs) and not noskip - self.has_pw_act = pw_act # activation after point-wise conv - - self.conv_dw = create_conv2d( - in_chs, in_chs, dw_kernel_size, stride=stride, dilation=dilation, padding=pad_type, - groups=groups, name=name + '/conv_dw') - self.bn1 = norm_act_layer(in_chs, name=name + '/bn1') - - # Squeeze-and-excitation - self.se = se_layer(in_chs, act_layer=act_layer, name=name + '/se') if se_layer else None - - self.conv_pw = create_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type, name=name + '/conv_pw') - self.bn2 = norm_act_layer(out_chs, inplace=True, apply_act=self.has_pw_act, name=name + '/bn2') - self.drop_path = DropPath(drop_path_rate) if drop_path_rate else None - - def feature_info(self, location): - if location == 'expansion': # after SE, input to PW - return dict(module='conv_pw', hook_type='forward_pre', num_chs=self.conv_pw.in_channels) - else: # location == 'bottleneck', block output - return dict(module='', num_chs=self.conv_pw.filters) - - def __call__(self, x): - shortcut = x - x = self.conv_dw(x) - x = self.bn1(x) - if self.se is not None: - x = self.se(x) - x = self.conv_pw(x) - x = self.bn2(x) - if self.has_skip: - if self.drop_path is not None: - x = self.drop_path(x) - x = x + shortcut - return x - - -class InvertedResidual: - """ Inverted residual block w/ optional SE - - Originally used in MobileNet-V2 - https://arxiv.org/abs/1801.04381v4, this layer is often - referred to as 'MBConv' for (Mobile inverted bottleneck conv) and is also used in - * MNasNet - https://arxiv.org/abs/1807.11626 - * EfficientNet - https://arxiv.org/abs/1905.11946 - * MobileNet-V3 - https://arxiv.org/abs/1905.02244 - """ - - def __init__( - self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, group_size=1, pad_type='', - noskip=False, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, act_layer=tf.keras.layers.ReLU, - norm_layer=tf.keras.layers.BatchNormalization, se_layer=None, conv_kwargs=None, drop_path_rate=0., - name=None): - norm_act_layer = get_norm_act_layer(norm_layer, act_layer) - conv_kwargs = conv_kwargs or {} - mid_chs = make_divisible(in_chs * exp_ratio) - groups = num_groups(group_size, mid_chs) - self.has_skip = (in_chs == out_chs and stride == 1) and not noskip - - # Point-wise expansion - self.conv_pw = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, name=name + '/conv_pw', **conv_kwargs) - self.bn1 = norm_act_layer(mid_chs, name=name + '/bn1') - - # Depth-wise convolution - self.conv_dw = create_conv2d( - mid_chs, mid_chs, dw_kernel_size, stride=stride, dilation=dilation, - groups=groups, padding=pad_type, name=name + '/conv_dw', **conv_kwargs) - self.bn2 = norm_act_layer(mid_chs, name=name + '/bn2') - - # Squeeze-and-excitation - self.se = se_layer(mid_chs, act_layer=act_layer) if se_layer else None - - # Point-wise linear projection - self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, - name=name + '/conv_pwl', **conv_kwargs) - self.bn3 = norm_act_layer(out_chs, apply_act=False, name=name + '/bn3') - self.drop_path = DropPath(drop_path_rate) if drop_path_rate else None - - def feature_info(self, location): - if location == 'expansion': # after SE, input to PWL - return dict(module='conv_pwl', hook_type='forward_pre', num_chs=self.conv_pwl.in_channels) - else: # location == 'bottleneck', block output - return dict(module='', num_chs=self.conv_pwl.filters) - - def __call__(self, x): - shortcut = x - x = self.conv_pw(x) - x = self.bn1(x) - x = self.conv_dw(x) - x = self.bn2(x) - if self.se is not None: - x = self.se(x) - x = self.conv_pwl(x) - x = self.bn3(x) - if self.has_skip: - if self.drop_path is not None: - x = self.drop_path(x) - x = x + shortcut - return x - - -class CondConvResidual(InvertedResidual): - """ Inverted residual block w/ CondConv routing""" - - def __init__( - self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, group_size=1, pad_type='', - noskip=False, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, act_layer=tf.keras.layers.ReLU, - norm_layer=tf.keras.layers.BatchNormalization, se_layer=None, num_experts=0, drop_path_rate=0., - name=None): - - self.num_experts = num_experts - conv_kwargs = dict(num_experts=self.num_experts) - - super(CondConvResidual, self).__init__( - in_chs, out_chs, dw_kernel_size=dw_kernel_size, stride=stride, dilation=dilation, group_size=group_size, - pad_type=pad_type, act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size, - pw_kernel_size=pw_kernel_size, se_layer=se_layer, norm_layer=norm_layer, conv_kwargs=conv_kwargs, - drop_path_rate=drop_path_rate) - - # self.routing_fn = nn.Linear(in_chs, self.num_experts) - self.routing_fn = tf.keras.layers.Dense(self.num_experts) - - def __call__(self, x): - shortcut = x - pooled_inputs = F.adaptive_avg_pool2d(x, 1).flatten(1) # CondConv routing - routing_weights = torch.sigmoid(self.routing_fn(pooled_inputs)) - x = self.conv_pw(x, routing_weights) - x = self.bn1(x) - x = self.conv_dw(x, routing_weights) - x = self.bn2(x) - x = self.se(x) - x = self.conv_pwl(x, routing_weights) - x = self.bn3(x) - if self.has_skip: - x = self.drop_path(x) + shortcut - return x - - -class EdgeResidual: - """ Residual block with expansion convolution followed by pointwise-linear w/ stride - - Originally introduced in `EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML` - - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html - - This layer is also called FusedMBConv in the MobileDet, EfficientNet-X, and EfficientNet-V2 papers - * MobileDet - https://arxiv.org/abs/2004.14525 - * EfficientNet-X - https://arxiv.org/abs/2102.05610 - * EfficientNet-V2 - https://arxiv.org/abs/2104.00298 - """ - - def __init__( - self, in_chs, out_chs, exp_kernel_size=3, stride=1, dilation=1, group_size=0, pad_type='', - force_in_chs=0, noskip=False, exp_ratio=1.0, pw_kernel_size=1, act_layer=tf.keras.layers.ReLU, - norm_layer=tf.keras.layers.BatchNormalization, se_layer=None, drop_path_rate=0., - name=None): - norm_act_layer = get_norm_act_layer(norm_layer, act_layer) - if force_in_chs > 0: - mid_chs = make_divisible(force_in_chs * exp_ratio) - else: - mid_chs = make_divisible(in_chs * exp_ratio) - groups = num_groups(group_size, in_chs) - self.has_skip = (in_chs == out_chs and stride == 1) and not noskip - - # Expansion convolution - self.conv_exp = create_conv2d( - in_chs, mid_chs, exp_kernel_size, stride=stride, dilation=dilation, groups=groups, padding=pad_type) - self.bn1 = norm_act_layer(mid_chs, inplace=True) - - # Squeeze-and-excitation - self.se = se_layer(mid_chs, act_layer=act_layer) if se_layer else nn.Identity() - - # Point-wise linear projection - self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type) - self.bn2 = norm_act_layer(out_chs, apply_act=False) - self.drop_path = DropPath(drop_path_rate) if drop_path_rate else nn.Identity() - - def feature_info(self, location): - if location == 'expansion': # after SE, before PWL - return dict(module='conv_pwl', hook_type='forward_pre', num_chs=self.conv_pwl.in_channels) - else: # location == 'bottleneck', block output - return dict(module='', num_chs=self.conv_pwl.filters) - - def __call__(self, x): - shortcut = x - x = self.conv_exp(x) - x = self.bn1(x) - x = self.se(x) - x = self.conv_pwl(x) - x = self.bn2(x) - if self.has_skip: - x = self.drop_path(x) + shortcut - return x - - -class BatchNormAct2d: - """BatchNorm + Activation - - This module performs BatchNorm + Activation in a manner that will remain backwards - compatible with weights trained with separate bn, act. This is why we inherit from BN - instead of composing it as a .bn member. - """ - def __init__( - self, - num_features, - epsilon=1e-5, - momentum=0.1, - affine=True, - track_running_stats=True, - apply_act=True, - act_layer=tf.keras.layers.ReLU, - act_kwargs=None, - inplace=True, - drop_layer=None, - device=None, - dtype=None, - name=None - ): - assert affine, 'Not Implemented' - self.bn = tf.keras.layers.BatchNormalization(momentum=momentum, epsilon=epsilon, name=name) - if act_kwargs is None: - act_kwargs = {} - self.act = act_layer(**act_kwargs) if apply_act else None - - def __call__(self, x): - x = self.bn(x) - if self.act is not None: - x = self.act(x) - return x - - -_NORM_ACT_MAP = dict(batchnorm=BatchNormAct2d) -_NORM_ACT_TYPES = {m for n, m in _NORM_ACT_MAP.items()} -_NORM_ACT_REQUIRES_ARG = {BatchNormAct2d} - - -def get_norm_act_layer(norm_layer, act_layer=None): - assert isinstance(norm_layer, (type, str, types.FunctionType, partial)) - # assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, partial)) - norm_act_kwargs = {} - - # unbind partial fn, so args can be rebound later - if isinstance(norm_layer, partial): - norm_act_kwargs.update(norm_layer.keywords) - norm_layer = norm_layer.func - - if isinstance(norm_layer, str): - raise NotImplemented - layer_name = norm_layer.replace('_', '').lower().split('-')[0] - norm_act_layer = _NORM_ACT_MAP.get(layer_name, None) - elif norm_layer in _NORM_ACT_TYPES: - norm_act_layer = norm_layer - elif isinstance(norm_layer, types.FunctionType): - raise NotImplemented - # if function type, must be a lambda/fn that creates a norm_act layer - norm_act_layer = norm_layer - else: - type_name = norm_layer.__name__.lower() - if type_name.startswith('batchnormalization'): - norm_act_layer = BatchNormAct2d - elif type_name.startswith('groupnorm'): - raise NotImplemented - norm_act_layer = GroupNormAct - elif type_name.startswith('groupnorm1'): - raise NotImplemented - norm_act_layer = functools.partial(GroupNormAct, num_groups=1) - elif type_name.startswith('layernorm2d'): - raise NotImplemented - norm_act_layer = LayerNormAct2d - elif type_name.startswith('layernorm'): - raise NotImplemented - norm_act_layer = LayerNormAct - else: - assert False, f"No equivalent norm_act layer for {type_name}" - - if norm_act_layer in _NORM_ACT_REQUIRES_ARG: - # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. - # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types - norm_act_kwargs.setdefault('act_layer', act_layer) - if norm_act_kwargs: - norm_act_layer = partial(norm_act_layer, **norm_act_kwargs) # bind/rebind args - return norm_act_layer diff --git a/tutorials/mct_model_garden/models_keras/efficientdet/effnet_keras.py b/tutorials/mct_model_garden/models_keras/efficientdet/effnet_keras.py deleted file mode 100644 index 4bf64116c..000000000 --- a/tutorials/mct_model_garden/models_keras/efficientdet/effnet_keras.py +++ /dev/null @@ -1,489 +0,0 @@ -# --------------------------------------------------------------- -# Copyright 2019 Ross Wightman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# --------------------------------------------------------------- - -""" -The following code was mostly duplicated from https://github.com/huggingface/pytorch-image-models -and changed to generate an equivalent Keras model. -Main changes: - * Torch layers replaced with Keras layers - * removed class inheritance from torch.nn.Module - * changed "forward" class methods with "__call__" -""" - -from functools import partial -from typing import Any, Dict, Optional, Union, List -import tensorflow as tf -from timm.models import parse_model_name, split_model_name_tag, is_model, build_model_with_cfg, FeatureInfo -from timm.models._efficientnet_builder import BN_EPS_TF_DEFAULT, decode_arch_def, round_channels -from timm.models._builder import pretrained_cfg_for_features - -from tutorials.mct_model_garden.models_keras.efficientdet.effnet_blocks_keras import create_conv2d, SqueezeExcite, get_attn, \ - handle_name, get_norm_act_layer, CondConvResidual, InvertedResidual, DepthwiseSeparableConv, EdgeResidual, \ - ConvBnAct, BatchNormAct2d - - -__all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights", - 'resolve_bn_args', 'resolve_act_layer', 'round_channels', 'BN_MOMENTUM_TF_DEFAULT', 'BN_EPS_TF_DEFAULT'] - - -# ####################################################################################### -# This file generates the Keras model. It's based on the EfficientNet code in the timm -# repository, and switched the Torch Modules with Keras layers -# ####################################################################################### - - -def _log_info_if(_str, _versbose): - if _versbose: - print(_str) - - -class EfficientNetBuilder: - """ Build Trunk Blocks - - This ended up being somewhat of a cross between - https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py - and - https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py - - """ - def __init__(self, output_stride=32, pad_type='', round_chs_fn=round_channels, se_from_exp=False, - act_layer=None, norm_layer=None, se_layer=None, drop_path_rate=0., feature_location=''): - self.output_stride = output_stride - self.pad_type = pad_type - self.round_chs_fn = round_chs_fn - self.se_from_exp = se_from_exp # calculate se channel reduction from expanded (mid) chs - self.act_layer = act_layer - self.norm_layer = norm_layer - self.se_layer = get_attn(se_layer) - try: - self.se_layer(8, rd_ratio=1.0) # test if attn layer accepts rd_ratio arg - self.se_has_ratio = True - except TypeError: - self.se_has_ratio = False - self.drop_path_rate = drop_path_rate - if feature_location == 'depthwise': - # old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense - # _logger.warning("feature_location=='depthwise' is deprecated, using 'expansion'") - feature_location = 'expansion' - self.feature_location = feature_location - assert feature_location in ('bottleneck', 'expansion', '') - self.verbose = False - - # state updated during build, consumed by model - self.in_chs = None - self.features = [] - - def _make_block(self, ba, block_idx, block_count, name): - drop_path_rate = self.drop_path_rate * block_idx / block_count - bt = ba.pop('block_type') - ba['name'] = name - ba['in_chs'] = self.in_chs - ba['out_chs'] = self.round_chs_fn(ba['out_chs']) - if 'force_in_chs' in ba and ba['force_in_chs']: - # NOTE this is a hack to work around mismatch in TF EdgeEffNet impl - ba['force_in_chs'] = self.round_chs_fn(ba['force_in_chs']) - ba['pad_type'] = self.pad_type - # block act fn overrides the model default - ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer - assert ba['act_layer'] is not None - ba['norm_layer'] = self.norm_layer - ba['drop_path_rate'] = drop_path_rate - if bt != 'cn': - se_ratio = ba.pop('se_ratio') - if se_ratio and self.se_layer is not None: - if not self.se_from_exp: - # adjust se_ratio by expansion ratio if calculating se channels from block input - se_ratio /= ba.get('exp_ratio', 1.0) - if self.se_has_ratio: - ba['se_layer'] = partial(self.se_layer, rd_ratio=se_ratio) - else: - ba['se_layer'] = self.se_layer - - if bt == 'ir': - if self.verbose: - print(' InvertedResidual {}, Args: {}'.format(block_idx, str(ba))) - block = CondConvResidual(**ba) if ba.get('num_experts', 0) else InvertedResidual(**ba) - elif bt == 'ds' or bt == 'dsa': - if self.verbose: - print(' DepthwiseSeparable {}, Args: {}'.format(block_idx, str(ba))) - block = DepthwiseSeparableConv(**ba) - elif bt == 'er': - _log_info_if(' EdgeResidual {}, Args: {}'.format(block_idx, str(ba)), self.verbose) - block = EdgeResidual(**ba) - elif bt == 'cn': - _log_info_if(' ConvBnAct {}, Args: {}'.format(block_idx, str(ba)), self.verbose) - block = ConvBnAct(**ba) - else: - assert False, 'Uknkown block type (%s) while building model.' % bt - - self.in_chs = ba['out_chs'] # update in_chs for arg of next block - return block - - def __call__(self, in_chs, model_block_args, name=None): - """ Build the blocks - Args: - in_chs: Number of input-channels passed to first block - model_block_args: A list of lists, outer list defines stages, inner - list contains strings defining block configuration(s) - Return: - List of block stacks (each stack wrapped in nn.Sequential) - """ - name = handle_name(name) - if self.verbose: - print('Building model trunk with %d stages...' % len(model_block_args)) - self.in_chs = in_chs - total_block_count = sum([len(x) for x in model_block_args]) - total_block_idx = 0 - current_stride = 2 - current_dilation = 1 - stages = [] - if model_block_args[0][0]['stride'] > 1: - # if the first block starts with a stride, we need to extract first level feat from stem - feature_info = dict(module='bn1', num_chs=in_chs, stage=0, reduction=current_stride) - self.features.append(feature_info) - - # outer list of block_args defines the stacks - for stack_idx, stack_args in enumerate(model_block_args): - last_stack = stack_idx + 1 == len(model_block_args) - if self.verbose: - print('Stack: {}'.format(stack_idx)) - assert isinstance(stack_args, list) - - blocks = [] - # each stack (stage of blocks) contains a list of block arguments - for block_idx, block_args in enumerate(stack_args): - last_block = block_idx + 1 == len(stack_args) - if self.verbose: - print(' Block: {}'.format(block_idx)) - - assert block_args['stride'] in (1, 2) - if block_idx >= 1: # only the first block in any stack can have a stride > 1 - block_args['stride'] = 1 - - extract_features = False - if last_block: - next_stack_idx = stack_idx + 1 - extract_features = next_stack_idx >= len(model_block_args) or \ - model_block_args[next_stack_idx][0]['stride'] > 1 - - next_dilation = current_dilation - if block_args['stride'] > 1: - next_output_stride = current_stride * block_args['stride'] - if next_output_stride > self.output_stride: - next_dilation = current_dilation * block_args['stride'] - block_args['stride'] = 1 - if self.verbose: - print(' Converting stride to dilation to maintain output_stride=={}'.format( - self.output_stride)) - else: - current_stride = next_output_stride - block_args['dilation'] = current_dilation - if next_dilation != current_dilation: - current_dilation = next_dilation - - # create the block - block = self._make_block(block_args, total_block_idx, total_block_count, f'{name}/{stack_idx}/{block_idx}') - blocks.append(block) - - # stash feature module name and channel info for model feature extraction - if extract_features: - feature_info = dict( - stage=stack_idx + 1, - reduction=current_stride, - **block.feature_info(self.feature_location), - ) - leaf_name = feature_info.get('module', '') - if leaf_name: - feature_info['module'] = '/'.join([f'blocks.{stack_idx}.{block_idx}', leaf_name]) - else: - assert last_block - feature_info['module'] = f'blocks.{stack_idx}' - self.features.append(feature_info) - - total_block_idx += 1 # incr global block idx (across all stacks) - stages.append(blocks) - return stages - - -class EfficientNetFeatures: - """ EfficientNet Feature Extractor - - A work-in-progress feature extraction module for EfficientNet, to use as a backbone for segmentation - and object detection models. - """ - - def __init__( - self, - block_args, - out_indices=(0, 1, 2, 3, 4), - feature_location='bottleneck', - in_chans=3, - stem_size=32, - fix_stem=False, - output_stride=32, - pad_type='', - round_chs_fn=round_channels, - act_layer=None, - norm_layer=None, - se_layer=None, - drop_rate=0., - drop_path_rate=0., - name=None - ): - name = handle_name(name) - act_layer = act_layer or tf.keras.layers.ReLU - norm_layer = norm_layer or tf.keras.layers.BatchNormalization - norm_act_layer = get_norm_act_layer(norm_layer, act_layer) - se_layer = se_layer or SqueezeExcite - self.drop_rate = drop_rate - self.grad_checkpointing = False - - # Stem - if not fix_stem: - stem_size = round_chs_fn(stem_size) - self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type, name=name + '/conv_stem') - self.bn1 = norm_act_layer(stem_size, name=name + '/bn1') - - # Middle stages (IR/ER/DS Blocks) - builder = EfficientNetBuilder( - output_stride=output_stride, - pad_type=pad_type, - round_chs_fn=round_chs_fn, - act_layer=act_layer, - norm_layer=norm_layer, - se_layer=se_layer, - drop_path_rate=drop_path_rate, - feature_location=feature_location, - ) - self.blocks = builder(stem_size, block_args, name=name + '/blocks') - self.feature_info = FeatureInfo(builder.features, out_indices) - self._stage_out_idx = {f['stage']: f['index'] for f in self.feature_info.get_dicts()} - - # efficientnet_init_weights(self) - - # Register feature extraction hooks with FeatureHooks helper - self.feature_hooks = None - if feature_location != 'bottleneck': - raise NotImplemented - hooks = self.feature_info.get_dicts(keys=('module', 'hook_type')) - # self.feature_hooks = FeatureHooks(hooks, self.named_modules()) - - def set_grad_checkpointing(self, enable=True): - self.grad_checkpointing = enable - - def __call__(self, x) -> List[tf.Tensor]: - x = self.conv_stem(x) - x = self.bn1(x) - if self.feature_hooks is None: - features = [] - if 0 in self._stage_out_idx: - features.append(x) # add stem out - for i, b in enumerate(self.blocks): - for bb in b: - # print(i, type(b), type(bb)) - x = bb(x) - if i + 1 in self._stage_out_idx: - features.append(x) - return features - else: - self.blocks(x) - out = self.feature_hooks.get_output(x.device) - return list(out.values()) - - -def _create_effnet(variant, pretrained=False, **kwargs): - features_mode = '' - model_cls = None # EfficientNet - kwargs_filter = None - if kwargs.pop('features_only', False): - if 'feature_cfg' in kwargs: - features_mode = 'cfg' - else: - kwargs_filter = ('num_classes', 'num_features', 'head_conv', 'global_pool') - model_cls = EfficientNetFeatures - features_mode = 'cls' - else: - raise NotImplemented - - model = build_model_with_cfg( - model_cls, - variant, - pretrained, - features_only=features_mode == 'cfg', - pretrained_strict=features_mode != 'cls', - kwargs_filter=kwargs_filter, - **kwargs, - ) - if features_mode == 'cls': - model.pretrained_cfg = model.default_cfg = pretrained_cfg_for_features(model.pretrained_cfg) - return model - - -def resolve_bn_args(kwargs): - bn_args = {} - bn_momentum = kwargs.pop('bn_momentum', None) - if bn_momentum is not None: - bn_args['momentum'] = bn_momentum - bn_eps = kwargs.pop('bn_eps', None) - if bn_eps is not None: - bn_args['epsilon'] = bn_eps - return bn_args - - -def resolve_act_layer(kwargs, default='relu'): - act_name = kwargs.pop('act_layer', default) - if act_name == 'relu': - return tf.keras.layers.ReLU - elif act_name == 'relu6': - return partial(tf.keras.layers.ReLU, max_value=6.0) - else: - raise NotImplemented - - -def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): - """Creates an EfficientNet-Lite model. - - Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite - Paper: https://arxiv.org/abs/1905.11946 - - EfficientNet params - name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) - 'efficientnet-lite0': (1.0, 1.0, 224, 0.2), - 'efficientnet-lite1': (1.0, 1.1, 240, 0.2), - 'efficientnet-lite2': (1.1, 1.2, 260, 0.3), - 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), - 'efficientnet-lite4': (1.4, 1.8, 300, 0.3), - - Args: - channel_multiplier: multiplier to number of channels per layer - depth_multiplier: multiplier to number of repeats per stage - """ - arch_def = [ - ['ds_r1_k3_s1_e1_c16'], - ['ir_r2_k3_s2_e6_c24'], - ['ir_r2_k5_s2_e6_c40'], - ['ir_r3_k3_s2_e6_c80'], - ['ir_r3_k5_s1_e6_c112'], - ['ir_r4_k5_s2_e6_c192'], - ['ir_r1_k3_s1_e6_c320'], - ] - model_kwargs = dict( - block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True), - num_features=1280, - stem_size=32, - fix_stem=True, - round_chs_fn=partial(round_channels, multiplier=channel_multiplier), - act_layer=resolve_act_layer(kwargs, 'relu6'), - norm_layer=kwargs.pop('norm_layer', None) or partial(tf.keras.layers.BatchNormalization, **resolve_bn_args(kwargs)), - **kwargs, - ) - model = _create_effnet(variant, pretrained, **model_kwargs) - return model - - -def tf_efficientnet_lite0(pretrained=False, **kwargs): - """ EfficientNet-Lite0 """ - # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2 - kwargs['bn_eps'] = BN_EPS_TF_DEFAULT - kwargs['pad_type'] = 'same' - kwargs['name'] = 'backbone' - model = _gen_efficientnet_lite( - 'tf_efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) - return model - - -model_entrypoints = {'tf_efficientnet_lite0': tf_efficientnet_lite0} - - -def create_model( - model_name: str, - pretrained: bool = False, - pretrained_cfg: Optional[Union[str, Dict[str, Any], Any]] = None, - pretrained_cfg_overlay: Optional[Dict[str, Any]] = None, - checkpoint_path: str = '', - scriptable: Optional[bool] = None, - exportable: Optional[bool] = None, - no_jit: Optional[bool] = None, - **kwargs, -): - """Create a model. - - Lookup model's entrypoint function and pass relevant args to create a new model. - - - **kwargs will be passed through entrypoint fn to ``timm.models.build_model_with_cfg()`` - and then the model class __init__(). kwargs values set to None are pruned before passing. - - - Args: - model_name: Name of model to instantiate. - pretrained: If set to `True`, load pretrained ImageNet-1k weights. - pretrained_cfg: Pass in an external pretrained_cfg for model. - pretrained_cfg_overlay: Replace key-values in base pretrained_cfg with these. - checkpoint_path: Path of checkpoint to load _after_ the model is initialized. - scriptable: Set layer config so that model is jit scriptable (not working for all models yet). - exportable: Set layer config so that model is traceable / ONNX exportable (not fully impl/obeyed yet). - no_jit: Set layer config so that model doesn't utilize jit scripted layers (so far activations only). - - Keyword Args: - drop_rate (float): Classifier dropout rate for training. - drop_path_rate (float): Stochastic depth drop rate for training. - global_pool (str): Classifier global pooling type. - - Example: - - ```py - >>> from timm import create_model - - >>> # Create a MobileNetV3-Large model with no pretrained weights. - >>> model = create_model('mobilenetv3_large_100') - - >>> # Create a MobileNetV3-Large model with pretrained weights. - >>> model = create_model('mobilenetv3_large_100', pretrained=True) - >>> model.num_classes - 1000 - - >>> # Create a MobileNetV3-Large model with pretrained weights and a new head with 10 classes. - >>> model = create_model('mobilenetv3_large_100', pretrained=True, num_classes=10) - >>> model.num_classes - 10 - ``` - """ - # Parameters that aren't supported by all models or are intended to only override model defaults if set - # should default to None in command line args/cfg. Remove them if they are present and not set so that - # non-supporting models don't break and default args remain in effect. - kwargs = {k: v for k, v in kwargs.items() if v is not None} - - model_source, model_name = parse_model_name(model_name) - if model_source == 'hf-hub': - assert not pretrained_cfg, 'pretrained_cfg should not be set when sourcing model from Hugging Face Hub.' - # For model names specified in the form `hf-hub:path/architecture_name@revision`, - # load model weights + pretrained_cfg from Hugging Face hub. - raise NotImplemented - # pretrained_cfg, model_name = load_model_config_from_hf(model_name) - else: - model_name, pretrained_tag = split_model_name_tag(model_name) - if pretrained_tag and not pretrained_cfg: - # a valid pretrained_cfg argument takes priority over tag in model name - pretrained_cfg = pretrained_tag - - if not is_model(model_name): - raise RuntimeError('Unknown model (%s)' % model_name) - - create_fn = model_entrypoints[model_name] - # with set_layer_config(scriptable=scriptable, exportable=exportable, no_jit=no_jit): - model = create_fn( - pretrained=pretrained, - pretrained_cfg=pretrained_cfg, - pretrained_cfg_overlay=pretrained_cfg_overlay, - **kwargs, - ) - - return model diff --git a/tutorials/mct_model_garden/models_keras/nanodet/LICENSE b/tutorials/mct_model_garden/models_keras/nanodet/LICENSE deleted file mode 100644 index f5816c9de..000000000 --- a/tutorials/mct_model_garden/models_keras/nanodet/LICENSE +++ /dev/null @@ -1,203 +0,0 @@ - Copyright 2020-2021 RangiLyu. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020-2021 RangiLyu - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_keras/nanodet/__init__.py b/tutorials/mct_model_garden/models_keras/nanodet/__init__.py deleted file mode 100644 index 2147ec284..000000000 --- a/tutorials/mct_model_garden/models_keras/nanodet/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/models_keras/nanodet/nanodet_keras_model.py b/tutorials/mct_model_garden/models_keras/nanodet/nanodet_keras_model.py deleted file mode 100644 index d3a3fb388..000000000 --- a/tutorials/mct_model_garden/models_keras/nanodet/nanodet_keras_model.py +++ /dev/null @@ -1,364 +0,0 @@ -# ------------------------------------------------------------------------------ -# Copyright 2020-2021 RangiLyu -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# ------------------------------------------------------------------------------ -""" -Nanodet-Plus Object Detection Model - -This code contains a TensorFlow/Keras implementation of Nanodet-Plus object detection model, following -https://github.com/RangiLyu/nanodet. This implementation also integrates the Nanodet-plus post-processing into the -model. - -The Nanodet-Plus model is optimized for real-time and resource-constrained environments while maintaining competitive -detection performance. It is particularly suitable for edge devices and embedded systems. - -The code is organized as follows: -- Function definitions for building the Nanodet-Plus model. -- Model definition - -For more details on the Nanodet-Plus model, refer to the original repository: -https://github.com/RangiLyu/nanodet - -""" - -import numpy as np -from keras.utils import plot_model -from keras.utils import get_source_inputs -from keras.layers import Input, Conv2D, MaxPool2D, GlobalMaxPooling2D, GlobalAveragePooling2D, \ - BatchNormalization, DepthwiseConv2D, Concatenate, Lambda, UpSampling2D, Add, Reshape, ZeroPadding2D, LeakyReLU, \ - Resizing, ReLU, Softmax -from keras.layers import Activation, Dense -from keras.models import Model -import keras.backend as K -import tensorflow as tf - -# Nanodet-Plus building blocks -def channel_split(x): - in_channles = x.shape.as_list()[-1] - ip = in_channles // 2 - c_hat = x[:, :, :, 0:ip] - c = x[:, :, :, ip:] - return c_hat, c - -def channel_shuffle(x): - height, width, channels = x.shape.as_list()[1:] - channels_per_split = channels // 2 - x = tf.reshape(x, [-1, height, width, 2, channels_per_split]) - x = tf.transpose(x, perm=[0,1,2,4,3]) - x = tf.reshape(x, [-1, height, width, channels]) - return x - -def shuffle_unit(inputs, out_channels, bottleneck_ratio,strides=2,stage=1,block=1): - if K.image_data_format() == 'channels_last': - bn_axis = -1 - else: - raise ValueError('Only channels last supported') - - prefix = 'backbone.stage{}.{}'.format(stage, block-1) - bottleneck_channels = int(out_channels * bottleneck_ratio) - if strides < 2: - c_hat, c = channel_split(inputs) - inputs = c - - x = Conv2D(bottleneck_channels, kernel_size=(1,1), strides=1, padding='same', use_bias=False, name='{}.branch2.0'.format(prefix))(inputs) - x = BatchNormalization(axis=bn_axis, epsilon=1e-05, name='{}.branch2.1'.format(prefix))(x) - x = LeakyReLU(alpha=0.1, name='{}.branch2.2'.format(prefix))(x) - if strides > 1: - x = ZeroPadding2D(padding=((1, 0), (1, 0)))(x) - x = DepthwiseConv2D(kernel_size=3, strides=strides, padding='valid', use_bias=False, name='{}.branch2.3'.format(prefix))(x) - else: - x = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', use_bias=False, name='{}.branch2.3'.format(prefix))(x) - x = BatchNormalization(axis=bn_axis, epsilon=1e-05, name='{}.branch2.4'.format(prefix))(x) - x = Conv2D(bottleneck_channels, kernel_size=1,strides=1,padding='same', use_bias=False, name='{}.branch2.5'.format(prefix))(x) - x = BatchNormalization(axis=bn_axis, epsilon=1e-05, name='{}.branch2.6'.format(prefix))(x) - x = LeakyReLU(alpha=0.1, name='{}.branch2.7'.format(prefix))(x) - - if strides < 2: - ret = Concatenate(axis=bn_axis, name='{}/concat_1'.format(prefix))([c_hat, x]) - else: - inputs = ZeroPadding2D(padding=((1, 0), (1, 0)))(inputs) - s2 = DepthwiseConv2D(kernel_size=3, strides=2, padding='valid', use_bias=False, name='{}.branch1.0'.format(prefix))(inputs) - s2 = BatchNormalization(axis=bn_axis, epsilon=1e-05, name='{}.branch1.1'.format(prefix))(s2) - s2 = Conv2D(bottleneck_channels, kernel_size=1,strides=1,padding='same', use_bias=False, name='{}.branch1.2'.format(prefix))(s2) - s2 = BatchNormalization(axis=bn_axis, epsilon=1e-05, name='{}.branch1.3'.format(prefix))(s2) - s2 = LeakyReLU(alpha=0.1, name='{}.branch1.4'.format(prefix))(s2) - ret = Concatenate(axis=bn_axis, name='{}/concat_2'.format(prefix))([s2, x]) - - ret = channel_shuffle(ret) - return ret - - -def block(x, channel_map, bottleneck_ratio, repeat=1, stage=1): - x = shuffle_unit(x, out_channels=channel_map[stage-1], - strides=2,bottleneck_ratio=bottleneck_ratio,stage=stage,block=1) - - for i in range(1, repeat+1): - x = shuffle_unit(x, out_channels=channel_map[stage-1],strides=1, - bottleneck_ratio=bottleneck_ratio,stage=stage, block=(1+i)) - - return x - -def nanodet_shufflenet_v2(input_tensor=None, - scale_factor=1.5, - input_shape=(416,416,3), - num_shuffle_units=[3,7,3], - bottleneck_ratio=0.5): - if scale_factor == 1.0: - out_channels_in_stage = np.array([24, 116, 232, 464, 1024]) - else: #scale_factor == 1.5: - out_channels_in_stage = np.array([24, 176, 352, 704, 1024]) - - out_channels_in_stage = out_channels_in_stage.astype(int) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - # create shufflenet architecture - x = ZeroPadding2D(padding=((1,0),(1,0)))(img_input) - x = Conv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='valid', use_bias=False, strides=(2, 2), - name='backbone.conv1.0')(x) - x = BatchNormalization(epsilon=1e-05, name='backbone.conv1.1')(x) - x = LeakyReLU(alpha=0.1)(x) - x = ZeroPadding2D(padding=((1,0),(1,0)))(x) - x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid', name='maxpool1')(x) - - # create stages containing shufflenet units beginning at stage 2 - out = [] - for stage in range(len(num_shuffle_units)): - repeat = num_shuffle_units[stage] - x = block(x, out_channels_in_stage, - repeat=repeat, - bottleneck_ratio=bottleneck_ratio, - stage=stage + 2) - out.append(x) - - if input_tensor: - inputs = get_source_inputs(input_tensor) - - else: - inputs = img_input - - return inputs, out - - -def conv_module(x, out_channels, kernel, name_prefix='ConvModule'): - x = Conv2D(out_channels, kernel, use_bias=False, name=name_prefix+'.conv')(x) - x = BatchNormalization(epsilon=1e-05, name=name_prefix+'.bn')(x) - x = LeakyReLU(alpha=0.1)(x) - return x - -def ghost_blocks(x, out_channels=128, name_prefix='GhostBlocks'): - residual = x - x1 = Conv2D(out_channels // 2, 1, use_bias=False, name=name_prefix+'.ghost1.primary_conv.0')(x) - x1 = BatchNormalization(epsilon=1e-05, name=name_prefix+'.ghost1.primary_conv.1')(x1) - x1 = LeakyReLU(alpha=0.1)(x1) - x2 = DepthwiseConv2D(3, padding="same", use_bias=False, name=name_prefix+'.ghost1.cheap_operation.0')(x1) - x2 = BatchNormalization(epsilon=1e-05, name=name_prefix+'.ghost1.cheap_operation.1')(x2) - x2 = LeakyReLU(alpha=0.1)(x2) - x = Concatenate()([x1, x2]) - - x1 = Conv2D(out_channels // 2, 1, use_bias=False, name=name_prefix+'.ghost2.primary_conv.0')(x) - x1 = BatchNormalization(epsilon=1e-05, name=name_prefix+'.ghost2.primary_conv.1')(x1) - x2 = DepthwiseConv2D(3, padding="same", use_bias=False, name=name_prefix+'.ghost2.cheap_operation.0')(x1) - x2 = BatchNormalization(epsilon=1e-05, name=name_prefix+'.ghost2.cheap_operation.1')(x2) - x = Concatenate()([x1, x2]) - - residual = DepthwiseConv2D(5, padding="same", use_bias=False, name=name_prefix+'.shortcut.0')(residual) - residual = BatchNormalization(epsilon=1e-05, name=name_prefix+'.shortcut.1')(residual) - residual = Conv2D(out_channels, 1, use_bias=False, name=name_prefix+'.shortcut.2')(residual) - residual = BatchNormalization(epsilon=1e-05, name=name_prefix+'.shortcut.3')(residual) - - x = Add()([residual, x]) - return x - -def depthwise_conv_module(x, out_channels=128, stride=2, name_prefix='DepthwiseConvModule'): - if stride > 1: - x = ZeroPadding2D(padding=((2, 2), (2, 2)))(x) - x = DepthwiseConv2D(5, strides=(stride, stride), padding="valid", use_bias=False, name=name_prefix + '.depthwise')(x) - else: - x = DepthwiseConv2D(5, strides=(stride, stride), padding="same", use_bias=False, name=name_prefix+'.depthwise')(x) - x = BatchNormalization(epsilon=1e-05, name=name_prefix+'.dwnorm')(x) - x = LeakyReLU(alpha=0.1)(x) - x = Conv2D(out_channels, 1, use_bias=False, name=name_prefix+'.pointwise')(x) - x = BatchNormalization(epsilon=1e-05, name=name_prefix+'.pwnorm')(x) - x = LeakyReLU(alpha=0.1)(x) - - return x - - -def nanodet_ghostpan(x, - in_channels=[176, 352, 704], - out_channels=128, - res=416): - - for idx in range(len(in_channels)): - x[idx] = conv_module(x[idx], out_channels, 1, name_prefix='fpn.reduce_layers.'+str(idx)) - - # top-down path - p4 = x[2] - x_upsampled = Resizing(int(res/16),int(res/16),interpolation="bilinear")(x[2]) - x_concate = Concatenate(axis=-1, name='p3_input')([x_upsampled, x[1]]) - p3 = ghost_blocks(x_concate, out_channels, name_prefix='fpn.top_down_blocks.0.blocks.0') - - x_upsampled = Resizing(int(res/8),int(res/8),interpolation="bilinear")(p3) - x_concate = Concatenate(axis=-1, name='p2_input')([x_upsampled, x[0]]) - p2 = ghost_blocks(x_concate, out_channels, name_prefix='fpn.top_down_blocks.1.blocks.0') - - # bottom up path - n2 = p2 - - x_downsampled = depthwise_conv_module(n2, out_channels, name_prefix='fpn.downsamples.0') - x_concate = Concatenate(axis=-1, name='n3_input')([x_downsampled, p3]) - n3 = ghost_blocks(x_concate, out_channels, name_prefix='fpn.bottom_up_blocks.0.blocks.0') - - x_downsampled = depthwise_conv_module(n3, out_channels, name_prefix='fpn.downsamples.1') - x_concate = Concatenate(axis=-1, name='n4_input')([x_downsampled, p4]) - n4 = ghost_blocks(x_concate, out_channels, name_prefix='fpn.bottom_up_blocks.1.blocks.0') - - n5_a = depthwise_conv_module(n4, out_channels, name_prefix='fpn.extra_lvl_out_conv.0') - n5_b = depthwise_conv_module(p4, out_channels, name_prefix='fpn.extra_lvl_in_conv.0') - - n5 = Add()([n5_a, n5_b]) - return [n2, n3, n4, n5] - -def distance2bbox(points, distance): - """Decode distance prediction to bounding box. - - Args: - points (Tensor): Shape (n, 2), [x, y]. - distance (Tensor): Distance from the given point to 4 - boundaries (left, top, right, bottom). - max_shape (tuple): Shape of the image. - - Returns: - Tensor: Decoded bboxes. - """ - d0, d1, d2, d3 = tf.unstack(distance, 4, -1) - a0, a1, a2, a3 = tf.unstack(points, 4, -1) - x1 = tf.math.subtract(a0, d0) - y1 = tf.math.subtract(a1, d1) - x2 = tf.math.add(a0, d2) - y2 = tf.math.add(a1, d3) - return x1, y1, x2, y2 - - -def dfl(x, c1=8): - """Distributed focal loss calculation. - Args: - c1: - x: - - Returns: - Tensor: bboxes after integral calculation. - """ - x_shape = x.shape - x = tf.reshape(x, [-1, x_shape[1] * x_shape[2], 4*c1]) - x = tf.reshape(x, [-1, x_shape[1] * x_shape[2], 4, c1]) - x = Softmax(-1)(x) - w = np.expand_dims(np.expand_dims(np.expand_dims(np.arange(c1), 0),0),-1) - conv = Conv2D(1, 1, use_bias=False, weights=[w]) - return tf.squeeze(conv(x),-1) - -def nanodet_generate_anchors(batch_size, featmap_sizes, strides): - anchors_list = [] - for i, stride in enumerate(strides): - h, w = featmap_sizes[i] - x_range = np.arange(w) * stride - y_range = np.arange(h) * stride - y, x = np.meshgrid(y_range, x_range) - y = y.flatten() - x = x.flatten() - strides = np.ones_like(x) * stride - anchors = np.stack([y, x, strides, strides], axis=-1) - anchors = np.expand_dims(anchors, axis=0) - anchors = np.repeat(anchors, batch_size, axis=0) - anchors_list.append(anchors) - return np.concatenate(anchors_list, axis=1, dtype=float) - -def nanodet_plus_head(n, feat_channels=128, num_classes=80): - regr_size = 32 # regression target before DFL (4 coordinates X 8 bins) - feat_out = num_classes + regr_size - h = n - for idx in range(4): - h[idx] = depthwise_conv_module(n[idx], out_channels=feat_channels, stride=1, name_prefix='head.cls_convs.' + str(idx) + '.0') - h[idx] = depthwise_conv_module(h[idx], out_channels=feat_channels, stride=1, name_prefix='head.cls_convs.' + str(idx) + '.1') - h[idx] = Conv2D(feat_out, 1, name='head.gfl_cls.' + str(idx))(h[idx]) - return h - -def nanodet_box_decoding(h, res, num_classes=80): - regr_size = 32 # regression target before DFL (4 coordinates X 8 bins) - strides = [8, 16, 32, 64] - batch_size = 1 - featmap_sizes = [(np.ceil(res / stride), np.ceil(res / stride)) for stride in strides] - all_anchors = 1 / res * nanodet_generate_anchors(batch_size, featmap_sizes, strides) - nn = res / 8 * res / 8 - anchors_list = np.split(all_anchors,[int(nn), int(1.25*nn), int(1.3125*nn)],axis=1) - h_cls = [] - h_bbox = [] - for idx in range(4): - # Split to 80 classes and 4 * 8 bounding boxes regression - cls, regr = tf.split(h[idx], [num_classes, regr_size],-1) - ndet = cls.shape[1] * cls.shape[2] - - # Distributed Focal loss integral - d = dfl(regr, 8) - - # Box decoding - anchors = tf.constant(anchors_list[idx],dtype=tf.float32) - d = tf.math.multiply(d, anchors[...,2,None]) - bbox0, bbox1, bbox2, bbox3 = distance2bbox(anchors, d) - bbox0, bbox1, bbox2, bbox3 = ReLU()(bbox0), ReLU()(bbox1), ReLU()(bbox2), ReLU()(bbox3) - bbox = tf.stack([bbox1, bbox0, bbox3, bbox2], -1) - bbox = tf.expand_dims(bbox,2) - - cls = tf.reshape(cls, [-1, ndet, num_classes]) - h_cls.append(cls) - h_bbox.append(bbox) - classes = Concatenate(axis=1, name='bb_dec_class')([h_cls[0], h_cls[1], h_cls[2], h_cls[3]]) - boxes = Concatenate(axis=1, name='bb_dec_bbox')([h_bbox[0], h_bbox[1], h_bbox[2], h_bbox[3]]) - classes = tf.math.sigmoid(classes) - return classes, boxes - -# Nanodet-Plus model definition -def nanodet_plus_m(input_shape, scale_factor, bottleneck_ratio, feat_channels, num_classes=80): - """ - Create the Nanodet-Plus object detection model. - - Args: - input_shape (tuple): The shape of input images (height, width, channels). - scale_factor (float): Scale factor for the ShuffleNetV2 backbone. - bottleneck_ratio (float): Bottleneck ratio for the ShuffleNetV2 backbone. - feat_channels (int): Number of feature channels. - num_classes (int): Number of output classes. - - Returns: - tf.keras.Model: The Nanodet-Plus model. - - Configuration options: - nanodet-plus-m-1.5x-416: input_shape = (416,416,3), scale_factor=1.5, bottleneck_ratio=0.5, feat_channels=128, num_classes=80 - nanodet-plus-m-1.5x-320: input_shape = (320,320,3), scale_factor=1.5, bottleneck_ratio=0.5, feat_channels=128, num_classes=80 - nanodet-plus-m-416: input_shape = (416,416,3), scale_factor=1.0, bottleneck_ratio=0.5, feat_channels=96, num_classes=80 - nanodet-plus-m-320: input_shape = (320,320,3), scale_factor=1.0, bottleneck_ratio=0.5, feat_channels=96, num_classes=80 - - """ - # Nanodet backbone - inputs, x = nanodet_shufflenet_v2(scale_factor=scale_factor, input_shape=input_shape, bottleneck_ratio=bottleneck_ratio) - - # Nanodet neck - x = nanodet_ghostpan(x, out_channels=feat_channels, res=input_shape[0]) - - # Nanodet head - x = nanodet_plus_head(x, feat_channels=feat_channels, num_classes=num_classes) - - # Define Keras model - return Model(inputs, x, name=f'Nanodet_plus_m_{scale_factor}x_{input_shape[0]}') diff --git a/tutorials/mct_model_garden/models_keras/utils/torch2keras_weights_translation.py b/tutorials/mct_model_garden/models_keras/utils/torch2keras_weights_translation.py deleted file mode 100644 index 11ada3485..000000000 --- a/tutorials/mct_model_garden/models_keras/utils/torch2keras_weights_translation.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from typing import Dict -import tensorflow as tf -import torch -import numpy as np - - -ln_patch = False - - -def weight_translation(keras_name: str, pytorch_weights_dict: Dict[str, np.ndarray], - layer: tf.keras.layers.Layer) -> np.ndarray: - """ - Convert a keras weight name format to torch naming format, so the value of the weight can be - retrieved from the Torch model state_dict. - - For example: - * Keras name: model_name/layer_name/kernel:0 - is translated to: - * Torch name: model_name.layer_name.weight - - Args: - keras_name: keras weight name - pytorch_weights_dict: the Torch model state_dict, as {name_str: weight value as numpy array} - layer: the Keras layer of the weight - - Returns: - the weight value as a numpy array - - """ - keras_name = keras_name.replace('/', '.') - if ln_patch and (isinstance(layer, tf.keras.layers.LayerNormalization) or - (isinstance(layer, tf.keras.layers.BatchNormalization) and '_bn_patch' in layer.name)): - if isinstance(layer, tf.keras.layers.LayerNormalization): - if '.beta:0' in keras_name: - value = layer.weights[1].numpy() - elif '.gamma:0' in keras_name: - value = layer.weights[0].numpy() - else: - raise Exception('Unknown LayerNorm weight name') - elif isinstance(layer, tf.keras.layers.BatchNormalization): - if '.beta:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace('_bn_patch', '').replace(".beta:0", ".bias")) - elif '.gamma:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace('_bn_patch', '').replace(".gamma:0", ".weight")) - elif '.moving_mean:0' in keras_name: - value = layer.weights[2].numpy() - elif '.moving_variance:0' in keras_name: - value = layer.weights[3].numpy() - else: - raise Exception('Unknown BatchNorm weight name') - else: - raise NotImplemented - # Handling MHA layers - elif isinstance(layer, tf.keras.layers.MultiHeadAttention): - if '.bias:0' in keras_name: - if '.query.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".query.bias:0", ".qkv_proj.bias")] - value = value[:int(value.shape[0]/3)].reshape((layer._num_heads, -1)) - elif '.key.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".key.bias:0", ".qkv_proj.bias")] - value = value[int(value.shape[0] / 3):2*int(value.shape[0] / 3)].reshape((layer._num_heads, -1)) - elif '.value.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".value.bias:0", ".qkv_proj.bias")] - value = value[2*int(value.shape[0] / 3):].reshape((layer._num_heads, -1)) - elif '.attention_output.' in keras_name: # or '.key.' in keras_name or '.value.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".attention_output.bias:0", ".out_proj.bias")] - else: - raise Exception('Unknown MHA bias name') - elif '.query.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".query.kernel:0", ".qkv_proj.weight")] - value = value[:int(value.shape[0]/3), :].transpose().reshape((int(value.shape[0]/3), layer._num_heads, -1)) - elif '.key.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".key.kernel:0", ".qkv_proj.weight")] - value = value[int(value.shape[0] / 3):2 * int(value.shape[0] / 3), :].transpose().reshape((int(value.shape[0]/3), layer._num_heads, -1)) - elif '.value.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".value.kernel:0", ".qkv_proj.weight")] - value = value[2*int(value.shape[0] / 3):, :].transpose().reshape((int(value.shape[0]/3), layer._num_heads, -1)) - elif '.attention_output.' in keras_name: # or '.key.' in keras_name or '.value.' in keras_name: - value = pytorch_weights_dict[keras_name.replace(".attention_output.kernel:0", ".out_proj.weight")] - value = value.transpose().reshape((layer._num_heads, -1, value.shape[-1])) - else: - raise Exception('Unknown MHA weight name') - - # Handle Convolution layers - elif '.depthwise_kernel:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace(".depthwise_kernel:0", ".weight")).transpose((2, 3, 0, 1)) - elif '.kernel:0' in keras_name: - if isinstance(layer, tf.keras.layers.Dense): - value = pytorch_weights_dict.pop(keras_name.replace(".kernel:0", ".weight")).transpose((1, 0)) - else: - value = pytorch_weights_dict.pop(keras_name.replace(".kernel:0", ".weight")) - if len(value.shape) == 2: - assert layer.kernel_size == (1, 1), "Error: Thie code is for converting Dense kernels to conv1x1" - value = value.transpose().reshape(layer.kernel._shape_tuple()) - else: - value = value.transpose((2, 3, 1, 0)) - elif '.bias:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace(".bias:0", ".bias")) - - # Handle normalization layers - elif '.beta:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace(".beta:0", ".bias")) - elif '.gamma:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace(".gamma:0", ".weight")) - elif '.moving_mean:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace(".moving_mean:0", ".running_mean")) - elif '.moving_variance:0' in keras_name: - value = pytorch_weights_dict.pop(keras_name.replace(".moving_variance:0", ".running_var")) - else: - value = pytorch_weights_dict.pop(keras_name) - return value - - -def load_state_dict(model: tf.keras.Model, state_dict_url: str = None, - state_dict_torch: Dict = None): - """ - Assign a Keras model weights according to a state_dict from the equivalent Torch model. - Args: - model: A Keras model - state_dict_url: the Torch model state_dict location - state_dict_torch: Torch model state_dict. If not None, will be used instead of state_dict_url - - Returns: - The same model object after assigning the weights - - """ - if state_dict_torch is None: - assert state_dict_url is not None, "either 'state_dict_url' or 'state_dict_torch' should not be None" - state_dict_torch = torch.hub.load_state_dict_from_url(state_dict_url, progress=False, - map_location='cpu') - state_dict = {k: v.numpy() for k, v in state_dict_torch.items()} - - for layer in model.layers: - for w in layer.weights: - w.assign(weight_translation(w.name, state_dict, layer)) - - # look for variables not assigned in torch state dict - for k in state_dict: - if 'num_batches_tracked' in k: - continue - print(f' WARNING: {k} not assigned to keras model !!!') diff --git a/tutorials/mct_model_garden/models_keras/yolov8/LICENSE b/tutorials/mct_model_garden/models_keras/yolov8/LICENSE deleted file mode 100644 index bae94e189..000000000 --- a/tutorials/mct_model_garden/models_keras/yolov8/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_keras/yolov8/__init__.py b/tutorials/mct_model_garden/models_keras/yolov8/__init__.py deleted file mode 100644 index 2147ec284..000000000 --- a/tutorials/mct_model_garden/models_keras/yolov8/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/models_keras/yolov8/yolov8.py b/tutorials/mct_model_garden/models_keras/yolov8/yolov8.py deleted file mode 100644 index e41e8c578..000000000 --- a/tutorials/mct_model_garden/models_keras/yolov8/yolov8.py +++ /dev/null @@ -1,454 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -""" -Yolov8n Object Detection Model - Keras implementation - -This code contains a TensorFlow/Keras implementation of Yolov8n object detection model, following -https://github.com/ultralytics/ultralytics. This implementation includes a slightly modified version of yolov8 -detection-head (mainly the box decoding part) that was optimized for model quantization. - -The code is organized as follows: -- Classes definitions of Yolov8n building blocks: Conv, Bottleneck, C2f, SPPF, Upsample, Concaat, DFL and Detect -- Detection Model definition: DetectionModelKeras -- A getter function for getting a new instance of the model - -For more details on the Yolov8n model, refer to the original repository: -https://github.com/ultralytics/ultralytics - -""" -import sys -from pathlib import Path -import re -import yaml -from copy import deepcopy -import contextlib -import math -import numpy as np -import tensorflow as tf -from keras import layers, initializers -from keras.layers import BatchNormalization, Concatenate, UpSampling2D, Input -from keras.models import Model -from typing import Dict, Optional, List, Tuple, Union -import cv2 - -def yaml_load(file: str = 'data.yaml', append_filename: bool = False) -> Dict[str, any]: - """ - Load YAML data from a file. - - Args: - file (str, optional): File name. Default is 'data.yaml'. - append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False. - - Returns: - dict: YAML data and file name. - """ - with open(file, errors='ignore', encoding='utf-8') as f: - s = f.read() # string - if not s.isprintable(): # remove special characters - s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s) - return {**yaml.safe_load(s), 'yaml_file': str(file)} if append_filename else yaml.safe_load(s) - -def make_divisible(x: int, divisor: int) -> int: - """ - Returns the nearest integer to 'x' that is divisible by 'divisor'. - - Args: - x (int): The input integer. - divisor (int): The divisor for which 'x' should be divisible. - - Returns: - int: The nearest integer to 'x' that is divisible by 'divisor'. - """ - return math.ceil(x / divisor) * divisor - -class Conv: - def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, name: str = '', g: int = 1, d: int = 1): - """ - Standard convolution layer. - - Args: - c1 (int): Number of input channels. - c2 (int): Number of output channels. - k (int, optional): Kernel size. Default is 1. - s (int, optional): Stride. Default is 1. - name (str, optional): Name of the layer. Default is an empty string. - g (int, optional): Groups. Default is 1. - d (int, optional): Dilation rate. Default is 1. - - """ - kernel_size = k[0] if isinstance(k,tuple) else k - if kernel_size > 1: - pad = ((1,0), (1,0)) if s > 1 else (1,1) - else: - pad = (0,0) - self.padding2d = layers.ZeroPadding2D(padding=pad) - self.conv = layers.Conv2D(c2, k, s, 'valid', groups=g, dilation_rate=d, use_bias=False, name=name+'.conv') - self.bn = layers.BatchNormalization(momentum=0.97, epsilon=1e-3, name=name+'.bn') - self.act = tf.nn.silu # default activation - self.c1 = c1 # Unused in Keras implementation - - def __call__(self, x): - return self.act(self.bn(self.conv(self.padding2d(x)))) - - def forward_fuse(self, x): - return self.act(self.conv(x)) - -class Bottleneck: - def __init__(self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5, - name: str = ''): - """ - Standard bottleneck layer. - - Args: - c1 (int): Number of input channels. - c2 (int): Number of output channels. - shortcut (bool, optional): Use shortcut connection. Default is True. - g (int, optional): Groups. Default is 1. - k (Tuple[int, int], optional): Kernel sizes. Default is (3, 3). - e (float, optional): Hidden channels ratio. Default is 0.5. - name (str, optional): Name of the layer. Default is an empty string. - - """ - c_ = int(c2 * e) # hidden channels - self.cv1 = Conv(c1, c_, k[0], 1, name=f'{name}.cv1') - self.cv2 = Conv(c_, c2, k[1], 1, g=g, name=f'{name}.cv2') - self.add = shortcut and c1 == c2 - - def __call__(self, x): - return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) - -class C2f: - def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, name: str = '', g: int = 1, e: float = 0.5): - """ - CSP Bottleneck with 2 convolutions. - - Args: - c1 (int): Number of input channels. - c2 (int): Number of output channels. - n (int, optional): Number of Bottleneck blocks. Default is 1. - shortcut (bool, optional): Use shortcut connection. Default is False. - name (str, optional): Name of the layer. Default is an empty string. - g (int, optional): Groups. Default is 1. - e (float, optional): Hidden channels ratio. Default is 0.5. - - """ - self.c = int(c2 * e) # hidden channels - self.cv1 = Conv(c1, 2 * self.c, 1, 1, name=f'{name}.cv1') - self.cv2 = Conv((2 + n) * self.c, c2, 1, name=f'{name}.cv2') # optional act=FReLU(c2) - self.m = [Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0, name=f'{name}.m.{i}') for i in range(n)] - - def __call__(self, x): - y1 = self.cv1(x) - y = tf.split(y1, 2, -1) - y.extend(m(y[-1]) for m in self.m) - return self.cv2(Concatenate(axis=-1)(y)) - - def forward_split(self, x): - y = list(self.cv1(x).split((self.c, self.c), 1)) - y.extend(m(y[-1]) for m in self.m) - return self.cv2(torch.cat(y, 1)) - -class SPPF: - def __init__(self, c1: int, c2: int, k: int = 5, name: str = ''): - """ - Spatial Pyramid Pooling - Fast (SPPF) layer. - - Args: - c1 (int): Number of input channels. - c2 (int): Number of output channels. - k (int, optional): Kernel size. Default is 5. - name (str, optional): Name of the layer. Default is an empty string. - """ - c_ = c1 // 2 # hidden channels - self.cv1 = Conv(c1, c_, 1, 1, name=f'{name}.cv1') - self.cv2 = Conv(c_ * 4, c2, 1, 1, name=f'{name}.cv2') - if k // 2 == 0: - padding = 'valid' - else: - padding = 'same' - self.m = layers.MaxPooling2D(pool_size=k, strides=1, padding=padding) - - def __call__(self, x): - x = self.cv1(x) - y1 = self.m(x) - y2 = self.m(y1) - y3 = self.m(y2) - y = Concatenate()([x, y1, y2, y3]) - return self.cv2(y) - - -class Upsample: - def __init__(self, size: Tuple[int, int] = None, scale_factor: Tuple[float, float] = None, mode: str = 'nearest'): - """ - Upsample layer using `UpSampling2D` for resizing the input. - - Args: - size (Tuple[int, int], optional): The target size (height, width) for upsampling. Default is None. - scale_factor (Tuple[float, float], optional): The scaling factor (height_scale, width_scale) for upsampling. Default is None. - mode (str, optional): The interpolation mode. Default is 'nearest'. - """ - self.m = UpSampling2D(size=scale_factor, data_format=None, interpolation=mode) - - def __call__(self, x): - return self.m(x) - -class Concat: - def __init__(self, dimension: int = 1): - """ - Concatenate a list of tensors along the specified dimension. - - Args: - dimension (int, optional): The dimension along which tensors will be concatenated. Default is 1. - """ - self.d = -1 if dimension==1 else dimension - - def __call__(self, x): - return Concatenate(self.d)(x) - -class DFL: - def __init__(self, c1: int = 8, name: str = ''): - """ - Distributed focal loss calculation. - - Args: - c1 (int, optional): The number of classes. Default is 8. - name (str, optional): Name prefix for layers. Default is an empty string. - """ - self.c1 = c1 - w = np.expand_dims(np.expand_dims(np.expand_dims(np.arange(c1), 0), 0), -1) - self.conv = layers.Conv2D(1, 1, use_bias=False, weights=[w], name=f'{name}.dfl.conv') - - def __call__(self, x): - x_shape = x.shape - x = tf.reshape(x, (-1, x_shape[1], 4, self.c1)) - x = layers.Softmax(-1)(x) - return tf.squeeze(self.conv(x), -1) - -def make_anchors(feats: List[int], strides: List[int], grid_cell_offset: float = 0.5): - """ - Generate anchors from features. - - Args: - feats (List[int]): List of feature sizes for generating anchors. - strides (List[int]): List of stride values corresponding to each feature size. - grid_cell_offset (float, optional): Grid cell offset. Default is 0.5. - - Returns: - Tuple[np.ndarray, np.ndarray]: A tuple containing the anchor points and stride tensors. - """ - anchor_points, stride_tensor = [], [] - assert feats is not None - for i, stride in enumerate(strides): - h, w = feats[i], feats[i] - sx = np.arange(stop=w) + grid_cell_offset # shift x - sy = np.arange(stop=h) + grid_cell_offset # shift y - sy, sx = np.meshgrid(sy, sx, indexing='ij') - anchor_points.append(np.stack((sx, sy), -1).reshape((-1, 2))) - stride_tensor.append(np.full((h * w, 1), stride)) - return np.concatenate(anchor_points, dtype='float32'), np.concatenate(stride_tensor, dtype='float32') - -def dist2bbox(points: tf.Tensor, distance: tf.Tensor) -> tf.Tensor: - """ - Decode distance prediction to bounding box. - - Args: - points (tf.Tensor): Shape (n, 2), [x, y]. - distance (tf.Tensor): Distance from the given point to 4 - boundaries (left, top, right, bottom). - - Returns: - tf.Tensor: Decoded bboxes. - """ - d0, d1, d2, d3 = tf.unstack(distance, 4, -1) - a0, a1 = tf.unstack(points, 2, -1) - x1 = layers.ReLU()(tf.math.subtract(a0, d0)) # Adding a relu in order to force unsigned output (which is expected in this case) - y1 = layers.ReLU()(tf.math.subtract(a1, d1)) - x2 = layers.ReLU()(tf.math.add(a0, d2)) - y2 = layers.ReLU()(tf.math.add(a1, d3)) - return tf.stack([y1, x1, y2, x2], -1) - -class Detect: - def __init__(self, nc: int = 80, ch: List[int] = (), name: str = ''): - """ - Detection layer for YOLOv8. - - Args: - nc (int): Number of classes. - ch (List[int]): List of channel values for detection layers. - name (str): Name for the detection layer. - - """ - self.nc = nc # number of classes - self.nl = len(ch) # number of detection layers - self.reg_max = 16 # DFL channels - self.no = nc + self.reg_max * 4 # number of outputs per anchor - self.feat_sizes = [80, 40, 20] - self.stride_sizes = [8, 16, 32] - img_size = 640 - nd0, nd1, nd2 = np.cumsum([sz ** 2 for sz in self.feat_sizes]) # split per stride/resolution level - - c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels - - # Bias initialization for the detection head kernels - a, b = [], [] - for s in self.stride_sizes: - a.append(initializers.Constant(1.0)) # box - b.append(initializers.Constant(math.log(5 / self.nc / (img_size / s) ** 2))) # cls (.01 objects, 80 classes, 640 img) - - # Detection head - self.cv2 = [[Conv(x, c2, 3, name=f'{name}.cv2.{i}.0'), Conv(c2, c2, 3, name=f'{name}.cv2.{i}.1'), layers.Conv2D(4 * self.reg_max, 1, bias_initializer=a[i], name=f'{name}.cv2.{i}.2')] for i, x in enumerate(ch)] - self.cv3 = [[Conv(x, c3, 3, name=f'{name}.cv3.{i}.0'), Conv(c3, c3, 3, name=f'{name}.cv3.{i}.1'), layers.Conv2D(self.nc, 1, bias_initializer=b[i], name=f'{name}.cv3.{i}.2')] for i, x in enumerate(ch)] - - # Distributed Focal Loss - self.dfl = DFL(self.reg_max, name=name) - - # Yolov8 anchors preparation. The strides are used to scale the different resolution levels - self.anchors, self.strides = (x.transpose(0,1) for x in make_anchors(self.feat_sizes, self.stride_sizes, 0.5)) - - # Anchors normalization - optimizations for better quantization - self.strides = self.strides / img_size - self.split_strides = [self.strides[0], self.strides[nd0], self.strides[nd1]] - self.anchors = self.anchors * self.strides - - def __call__(self, x): - # Detection head convolutions. Output per stride level - feat = self.feat_sizes - xbox, xcls = [0,0,0], [0,0,0] - for i in range(self.nl): - x0 = self.cv2[i][0](x[i]) - x0 = self.cv2[i][1](x0) - x0 = self.cv2[i][2](x0) - x1 = self.cv3[i][0](x[i]) - x1 = self.cv3[i][1](x1) - x1 = self.cv3[i][2](x1) - xbox[i], xcls[i] = x0, x1 - - # Classes - concatenation of the stride levels and sigmoid operator - cls = Concatenate(axis=1)([tf.reshape(xi, (-1, feat[i] ** 2, self.nc)) for i, xi in enumerate(xcls)]) - y_cls = tf.math.sigmoid(cls) - - # Boxes - DFL operator, stride scaling and lastly concatenation (for better quantization, we want a concatenation of inputs with the same scale) - box = [tf.reshape(xi, (-1, feat[i] ** 2, self.reg_max * 4)) for i, xi in enumerate(xbox)] - dist = Concatenate(axis=1)([tf.math.multiply(self.dfl(b), self.split_strides[i]) for i,b in enumerate(box)]) - anchors = tf.expand_dims(self.anchors, 0) - y_bb = dist2bbox(anchors, dist) - y_bb = tf.expand_dims(y_bb, 2) - - return [y_bb, y_cls] - -def parse_model(d: dict, ch: List[int], verbose: bool = True) -> Tuple[List, List[int]]: - """ - Parse a YOLO model.yaml dictionary and construct the model architecture. - - Args: - d (dict): YOLO model.yaml dictionary containing model configuration. - ch (List[int]): List of initial channel sizes. - verbose (bool, optional): Verbose mode for printing model details. Default is True. - - Returns: - list: A list of model layers. - list: A list of save indices for layers. - """ - nc, gd, gw, act = d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') - ch = [ch] - layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out - for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args - m = 'Upsample' if m == 'nn.Upsample' else m - m = eval(m) if isinstance(m, str) else m # eval strings - for j, a in enumerate(args): - with contextlib.suppress(NameError): - args[j] = eval(a) if isinstance(a, str) else a # eval strings - - n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain - if m in [Conv, C2f, SPPF]: - c1, c2 = ch[f], args[0] - if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output) - c2 = make_divisible(c2 * gw, 8) - - args = [c1, c2, *args[1:]] - if m in [C2f]: - args.insert(2, n) # number of repeats - n = 1 - elif m is BatchNormalization: - args = [ch[f]] - elif m is Concat: - c2 = sum(ch[x] for x in f) - elif m is Detect: - args.append([ch[x] for x in f]) - else: - c2 = ch[f] - - if n > 1: - raise Exception("Not implemented") - - if m is C2f and len(args) == 3: - args.append(False) - if m in [Conv, C2f, SPPF, Detect]: - args.append(f'model.{i}') - m_ = m(*args) - t = str(m)[8:-2].replace('__main__.', '') # module type - m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type - print(f'{i:>3}{str(f):>20}{n_:>3} {t:<45}{str(args):<30}') - save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist - layers.append(m_) - if i == 0: - ch = [] - ch.append(c2) - return layers, sorted(save) - -class DetectionModelKeras: - def __init__(self, cfg: dict, ch: int = 3, verbose: bool = True): - """ - YOLOv8 detection model. - - Args: - cfg (dict): Model configuration in the form of a YAML string or a dictionary. - ch (int): Number of input channels. - verbose (bool, optional): Verbose mode for printing model details. Default is True. - """ - # Define model - self.yaml = cfg - ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels - self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist - - def __call__(self, x): - y, dt = [], [] # outputs - for m in self.model: - if m.f != -1: # if not from previous layer - x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers - x = m(x) # run - y.append(x if m.i in self.save else None) # save output - return x - -def yolov8_keras(model_yaml: str, img_size: int) -> Model: - """ - Create Keras model of YOLOv8 detection. - - Args: - model (str): Name of the YOLOv8 model configuration file (YAML format). - img_size (int): Size of the input image (assuming square dimensions). - - Returns: - Model: YOLOv8 detection model. - """ - cfg = model_yaml - cfg_dict = yaml_load(cfg, append_filename=True) # model dict - model_func = DetectionModelKeras(cfg_dict, verbose=True) # model functionality - inputs = Input(shape=(img_size, img_size, 3)) - return Model(inputs, model_func(inputs)) diff --git a/tutorials/mct_model_garden/models_keras/yolov8/yolov8_preprocess.py b/tutorials/mct_model_garden/models_keras/yolov8/yolov8_preprocess.py deleted file mode 100644 index 7a67be944..000000000 --- a/tutorials/mct_model_garden/models_keras/yolov8/yolov8_preprocess.py +++ /dev/null @@ -1,76 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -""" -This code is mostly based on Ultralytics implementation. For more details, refer to the original repository: -https://github.com/ultralytics/ultralytics -""" - -import numpy as np -from typing import Tuple -import cv2 - - -def yolov8_preprocess(x: np.ndarray, img_mean: float = 0.0, img_std: float = 255.0, pad_values: int = 114, - size: Tuple[int, int] = (640, 640)) -> np.ndarray: - """ - Preprocess an input image for YOLOv8 model. - - Args: - x (np.ndarray): Input image as a NumPy array. - img_mean (float): Mean value used for normalization. Default is 0.0. - img_std (float): Standard deviation used for normalization. Default is 255.0. - pad_values (int): Value used for padding. Default is 114. - size (Tuple[int, int]): Desired output size (height, width). Default is (640, 640). - - Returns: - np.ndarray: Preprocessed image as a NumPy array. - """ - h, w = x.shape[:2] # Image size - hn, wn = size # Image new size - r = max(h / hn, w / wn) - hr, wr = int(np.round(h / r)), int(np.round(w / r)) - # pad = ((int((hn - hr) / 2), int((hn - hr) / 2 + 0.5)), (int((wn - wr) / 2), int((wn - wr) / 2 + 0.5)), (0, 0)) - pad = ( - (int((hn - hr) / 2), int((hn - hr) / 2 + 0.5)), - (int((wn - wr) / 2), int((wn - wr) / 2 + 0.5)), - (0, 0) - ) - - x = np.flip(x, -1) # Flip image channels - x = cv2.resize(x, (wr, hr), interpolation=cv2.INTER_AREA) # Aspect ratio preserving resize - x = np.pad(x, pad, constant_values=pad_values) # Padding to the target size - x = (x - img_mean) / img_std # Normalization - return x - -def yolov8_preprocess_chw_transpose(x: np.ndarray, img_mean: float = 0.0, img_std: float = 255.0, pad_values: int = 114, - size: Tuple[int, int] = (640, 640)) -> np.ndarray: - """ - Preprocess an input image for YOLOv8 model with additional CHW transpose (for PyTorch implementation) - - Args: - x (np.ndarray): Input image as a NumPy array. - img_mean (float): Mean value used for normalization. Default is 0.0. - img_std (float): Standard deviation used for normalization. Default is 255.0. - pad_values (int): Value used for padding. Default is 114. - size (Tuple[int, int]): Desired output size (height, width). Default is (640, 640). - - Returns: - np.ndarray: Preprocessed image as a NumPy array. - """ - return yolov8_preprocess(x, img_mean, img_std, pad_values, size).transpose([2, 0, 1]) \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_keras/yolov8/yolov8n.yaml b/tutorials/mct_model_garden/models_keras/yolov8/yolov8n.yaml deleted file mode 100644 index 6932ed16c..000000000 --- a/tutorials/mct_model_garden/models_keras/yolov8/yolov8n.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -# Yolov8n Object Detection Model - Configuration for Keras implementation - -# Parameters -nc: 80 # number of classes -depth_multiple: 0.33 # scales module repeats -width_multiple: 0.25 # scales convolution channels - -# YOLOv8.0n backbone -backbone: - # [from, repeats, module, args] - - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - - [-1, 3, C2f, [128, True]] - - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - - [-1, 6, C2f, [256, True]] - - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - - [-1, 6, C2f, [512, True]] - - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - - [-1, 3, C2f, [1024, True]] - - [-1, 1, SPPF, [1024, 5]] # 9 - -# YOLOv8.0n head -head: - - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - - [-1, 3, C2f, [512]] # 12 - - - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - - [-1, 3, C2f, [256]] # 15 (P3/8-small) - - - [-1, 1, Conv, [256, 3, 2]] - - [[-1, 12], 1, Concat, [1]] # cat head P4 - - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - - - [-1, 1, Conv, [512, 3, 2]] - - [[-1, 9], 1, Concat, [1]] # cat head P5 - - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - - - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/tutorials/mct_model_garden/models_pytorch/__init__.py b/tutorials/mct_model_garden/models_pytorch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/mct_model_garden/models_pytorch/fastdepth/LICENSE b/tutorials/mct_model_garden/models_pytorch/fastdepth/LICENSE deleted file mode 100644 index bae94e189..000000000 --- a/tutorials/mct_model_garden/models_pytorch/fastdepth/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/fastdepth/__init__.py b/tutorials/mct_model_garden/models_pytorch/fastdepth/__init__.py deleted file mode 100644 index e11a7cc60..000000000 --- a/tutorials/mct_model_garden/models_pytorch/fastdepth/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py b/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py deleted file mode 100644 index cbd674d6d..000000000 --- a/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py +++ /dev/null @@ -1,196 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the fast-depth repository. -# -# MIT License -# -# Copyright (c) 2019 Diana Wofk -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# # ------------------------------------------------------------------------------ - -""" -Part of this code was based on fast-depth implementation. For more details, refer to the original repository: -https://github.com/dwofk/fast-depth -""" -import torch -import torch.nn as nn -import math -import torch.nn.functional as F -from huggingface_hub import PyTorchModelHubMixin - - -def weights_init(m): - # Initialize kernel weights with Gaussian distributions - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.ConvTranspose2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - -def depthwise(in_channels, kernel_size): - padding = (kernel_size-1) // 2 - assert 2*padding == kernel_size-1, "parameters incorrect. kernel={}, padding={}".format(kernel_size, padding) - return nn.Sequential( - nn.Conv2d(in_channels,in_channels,kernel_size,stride=1,padding=padding,bias=False,groups=in_channels), - nn.BatchNorm2d(in_channels), - nn.ReLU(inplace=True), - ) - -def pointwise(in_channels, out_channels): - return nn.Sequential( - nn.Conv2d(in_channels,out_channels,1,1,0,bias=False), - nn.BatchNorm2d(out_channels), - nn.ReLU(inplace=True), - ) - -class MobileNetBackbone(nn.Module): - def __init__(self, relu6=True): - super(MobileNetBackbone, self).__init__() - - def relu(relu6): - if relu6: - return nn.ReLU6(inplace=True) - else: - return nn.ReLU(inplace=True) - - def conv_bn(inp, oup, stride, relu6): - return nn.Sequential( - nn.Conv2d(inp, oup, 3, stride, 1, bias=False), - nn.BatchNorm2d(oup), - relu(relu6), - ) - - def conv_dw(inp, oup, stride, relu6): - return nn.Sequential( - nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), - nn.BatchNorm2d(inp), - relu(relu6), - - nn.Conv2d(inp, oup, 1, 1, 0, bias=False), - nn.BatchNorm2d(oup), - relu(relu6), - ) - - self.model = nn.Sequential( - conv_bn(3, 16, 2, relu6), - conv_dw(16, 56, 1, relu6), - conv_dw(56, 88, 2, relu6), - conv_dw(88, 120, 1, relu6), - conv_dw(120, 144, 2, relu6), - conv_dw(144, 256, 1, relu6), - conv_dw(256, 408, 2, relu6), - conv_dw(408, 376, 1, relu6), - conv_dw(376, 272, 1, relu6), - conv_dw(272, 288, 1, relu6), - conv_dw(288, 296, 1, relu6), - conv_dw(296, 328, 1, relu6), - conv_dw(328, 480, 2, relu6), - conv_dw(480, 512, 1, relu6), - nn.AvgPool2d(7), - ) - -class FastDepth(nn.Module, PyTorchModelHubMixin): - def __init__(self): - - super(FastDepth, self).__init__() - mobilenet = MobileNetBackbone() - - for i in range(14): - setattr(self, 'conv{}'.format(i), mobilenet.model[i]) - - kernel_size = 5 - self.decode_conv1 = nn.Sequential( - depthwise(512, kernel_size), - pointwise(512, 200)) - self.decode_conv2 = nn.Sequential( - depthwise(200, kernel_size), - pointwise(200, 256)) - self.decode_conv3 = nn.Sequential( - depthwise(256, kernel_size), - pointwise(256, 120)) - self.decode_conv4 = nn.Sequential( - depthwise(120, kernel_size), - pointwise(120, 56)) - self.decode_conv5 = nn.Sequential( - depthwise(56, kernel_size), - pointwise(56, 16)) - self.decode_conv6 = pointwise(16, 1) - weights_init(self.decode_conv1) - weights_init(self.decode_conv2) - weights_init(self.decode_conv3) - weights_init(self.decode_conv4) - weights_init(self.decode_conv5) - weights_init(self.decode_conv6) - - def load_weights(self, path): - self.load_state_dict(torch.load(path)) - - def forward(self, x): - # skip connections: dec4: enc1 - # dec 3: enc2 or enc3 - # dec 2: enc4 or enc5 - for i in range(14): - layer = getattr(self, 'conv{}'.format(i)) - x = layer(x) - # print("{}: {}".format(i, x.size())) - if i == 1: - x1 = x - elif i == 3: - x2 = x - elif i == 5: - x3 = x - for i in range(1, 6): - layer = getattr(self, 'decode_conv{}'.format(i)) - x = layer(x) - x = F.interpolate(x, scale_factor=2, mode='nearest') - if i == 4: - x = x + x1 - elif i == 3: - x = x + x2 - elif i == 2: - x = x + x3 - # print("{}: {}".format(i, x.size())) - x = self.decode_conv6(x) - return x - - def make_tensors_contiguous(self): - for name, param in self.named_parameters(): - if not param.is_contiguous(): - param.data = param.data.contiguous() - - for name, buffer in self.named_buffers(): - if not buffer.is_contiguous(): - buffer.data = buffer.data.contiguous() - - def save_pretrained(self, save_directory, **kwargs): - # Make tensors contiguous - self.make_tensors_contiguous() - # Call the original save_pretrained method - super().save_pretrained(save_directory, **kwargs) - - - diff --git a/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/LICENSE b/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/LICENSE deleted file mode 100644 index b4e9438bd..000000000 --- a/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2019 Ross Wightman - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/__init__.py b/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/__init__.py deleted file mode 100644 index e11a7cc60..000000000 --- a/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/mobilevit_xs.py b/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/mobilevit_xs.py deleted file mode 100644 index c83e430ea..000000000 --- a/tutorials/mct_model_garden/models_pytorch/mobilevit_xs/mobilevit_xs.py +++ /dev/null @@ -1,1282 +0,0 @@ -# --------------------------------------------------------------- -# Copyright 2019 Ross Wightman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# --------------------------------------------------------------- - -""" -MobileViT (extra small-sized model) - PyTorch implementation - -This code contains a PyTorch implementation of mobilevit-xs model, following -https://github.com/huggingface/pytorch-image-models. This implementation includes a slightly modified version of -MobileViT attention that was optimized for model quantization. - -Main changes: - - Adjust the Attention layer to enhance compatibility for quantization (renamed as ModifiedAttention). - - Rearrange the input structure for every Attention layer to make it suitable for quantization. - - Inheritance class from HuggingFace - - Simplification of model initialization procedures. - -The code is organized as follows: - - Helper functions of timm building blocks, including: get_act_layer, _create_act, _create_fc, get_norm_act_layer, - create_conv2d_pad, create_conv2d, update_block_kwargs, create_block, create_byob_stages, create_byob_stem, - create_classifier and more. - - Configurations of MobileViT-XS model and building blocks: ByoModelCfg, ByoBlockCfg, _inverted_residual_block, - _mobilevit_block and model_cfgs. - - Classes definitions of MobileViT-XS building blocks: BatchNormAct2d, ConvNormAct, BottleneckBlock, - Attention (ModifiedAttention), Mlp, TransformerBlock, MobileVitBlock, SelectAdaptivePool2d and ClassifierHead. - - Classification Model definition: MobileViTXSPyTorch - -For more details on the mobilevit-xs model, refer to the original repository: -https://github.com/huggingface/pytorch-image-models - -""" -import collections.abc -import math -import types -from dataclasses import dataclass, field, replace -from enum import Enum -from functools import partial -from itertools import repeat -from typing import Tuple, Union, Optional, Any, Callable, Dict, Type, List, Sequence - -import torch -from huggingface_hub import PyTorchModelHubMixin -from torch import _assert -from torch import nn -from torch.nn import functional as F - -_ACT_LAYER_DEFAULT = dict( - silu=nn.SiLU, - swish=nn.SiLU, - mish=nn.Mish, - relu=nn.ReLU, - relu6=nn.ReLU6, - leaky_relu=nn.LeakyReLU, - elu=nn.ELU, - celu=nn.CELU, - selu=nn.SELU, - hard_sigmoid=nn.Hardsigmoid, - hard_swish=nn.Hardswish, - identity=nn.Identity, -) - - -class Format(str, Enum): - NCHW = 'NCHW' - NHWC = 'NHWC' - NCL = 'NCL' - NLC = 'NLC' - - -FormatT = Union[str, Format] - - -# From PyTorch internals -def _ntuple(n): - def parse(x): - if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): - return tuple(x) - return tuple(repeat(x, n)) - - return parse - - -to_2tuple = _ntuple(2) -_int_tuple_2_t = Union[int, Tuple[int, int]] - - -def named_apply( - fn: Callable, - module: nn.Module, name='', - depth_first: bool = True, - include_root: bool = False, -) -> nn.Module: - if not depth_first and include_root: - fn(module=module, name=name) - for child_name, child_module in module.named_children(): - child_name = '.'.join((name, child_name)) if name else child_name - named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True) - if depth_first and include_root: - fn(module=module, name=name) - return module - - -def get_act_layer(name: Union[Type[nn.Module], str] = 'relu'): - """ Activation Layer Factory - Fetching activation layers by name with this function allows export or torch script friendly - functions to be returned dynamically based on current config. - """ - if name is None: - return None - if not isinstance(name, str): - return name - if not name: - return None - return _ACT_LAYER_DEFAULT[name] - - -def _create_act(act_layer, act_kwargs=None, inplace=False, apply_act=True): - act_layer = get_act_layer(act_layer) # string -> nn.Module - act_kwargs = act_kwargs or {} - if act_layer is not None and apply_act: - if inplace: - act_kwargs['inplace'] = inplace - act = act_layer(**act_kwargs) - else: - act = nn.Identity() - return act - - -def _create_fc(num_features, num_classes, use_conv=False): - if num_classes <= 0: - fc = nn.Identity() # pass-through (no classifier) - elif use_conv: - fc = nn.Conv2d(num_features, num_classes, 1, bias=True) - else: - fc = nn.Linear(num_features, num_classes, bias=True) - return fc - - -def adaptive_pool_feat_mult(pool_type='avg'): - if pool_type.endswith('catavgmax'): - return 2 - else: - return 1 - - -def _init_weights(module, name='', zero_init_last=False): - if isinstance(module, nn.Conv2d): - fan_out = module.kernel_size[0] * module.kernel_size[1] * module.out_channels - fan_out //= module.groups - module.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) - if module.bias is not None: - module.bias.data.zero_() - elif isinstance(module, nn.Linear): - nn.init.normal_(module.weight, mean=0.0, std=0.01) - if module.bias is not None: - nn.init.zeros_(module.bias) - elif isinstance(module, nn.BatchNorm2d): - nn.init.ones_(module.weight) - nn.init.zeros_(module.bias) - elif hasattr(module, 'init_weights'): - module.init_weights(zero_init_last=zero_init_last) - - -@dataclass -class ByoBlockCfg: - type: Union[str, nn.Module] - d: int # block depth (number of block repeats in stage) - c: int # number of output channels for each block in stage - s: int = 2 # stride of stage (first block) - gs: Optional[Union[int, Callable]] = None # group-size of blocks in stage, conv is depthwise if gs == 1 - br: float = 1. # bottleneck-ratio of blocks in stage - - # NOTE: these config items override the model cfgs that are applied to all blocks by default - attn_layer: Optional[str] = None - attn_kwargs: Optional[Dict[str, Any]] = None - self_attn_layer: Optional[str] = None - self_attn_kwargs: Optional[Dict[str, Any]] = None - block_kwargs: Optional[Dict[str, Any]] = None - - -@dataclass -class ByoModelCfg: - blocks: Tuple[Union[ByoBlockCfg, Tuple[ByoBlockCfg, ...]], ...] - downsample: str = 'conv1x1' - stem_type: str = '3x3' - stem_pool: Optional[str] = 'maxpool' - stem_chs: int = 32 - width_factor: float = 1.0 - num_features: int = 0 # num out_channels for final conv, no final 1x1 conv if 0 - zero_init_last: bool = True # zero init last weight (usually bn) in residual path - fixed_input_size: bool = False # model constrained to a fixed-input size / img_size must be provided on creation - - act_layer: str = 'relu' - norm_layer: str = 'batchnorm' - - # NOTE: these config items will be overridden by the block cfg (per-block) if they are set there - attn_layer: Optional[str] = None - attn_kwargs: dict = field(default_factory=lambda: dict()) - self_attn_layer: Optional[str] = None - self_attn_kwargs: dict = field(default_factory=lambda: dict()) - block_kwargs: Dict[str, Any] = field(default_factory=lambda: dict()) - - -def _inverted_residual_block(d, c, s, br=4.0): - # inverted residual is a bottleneck block with bottle_ratio > 1 applied to in_chs, linear output, gs=1 (depthwise) - return ByoBlockCfg( - type='bottle', d=d, c=c, s=s, gs=1, br=br, - block_kwargs=dict(bottle_in=True, linear_out=True)) - - -def _mobilevit_block(d, c, s, transformer_dim, transformer_depth, patch_size=4, br=4.0): - # inverted residual + mobilevit blocks as per MobileViT network - return ( - _inverted_residual_block(d=d, c=c, s=s, br=br), - ByoBlockCfg( - type='mobilevit', d=1, c=c, s=1, - block_kwargs=dict( - transformer_dim=transformer_dim, - transformer_depth=transformer_depth, - patch_size=patch_size) - ) - ) - - -model_cfgs = dict( - mobilevit_xs=ByoModelCfg( - blocks=( - _inverted_residual_block(d=1, c=32, s=1), - _inverted_residual_block(d=3, c=48, s=2), - _mobilevit_block(d=1, c=64, s=2, transformer_dim=96, transformer_depth=2, patch_size=2), - _mobilevit_block(d=1, c=80, s=2, transformer_dim=120, transformer_depth=4, patch_size=2), - _mobilevit_block(d=1, c=96, s=2, transformer_dim=144, transformer_depth=3, patch_size=2), - ), - stem_chs=16, - stem_type='3x3', - stem_pool='', - downsample='', - act_layer='silu', - num_features=384, - ), -) - - -class BatchNormAct2d(nn.BatchNorm2d): - """BatchNorm + Activation - - This module performs BatchNorm + Activation in a manner that will remain backwards - compatible with weights trained with separate bn, act. This is why we inherit from BN - instead of composing it as a .bn member. - """ - - def __init__( - self, - num_features, - eps=1e-5, - momentum=0.1, - affine=True, - track_running_stats=True, - apply_act=True, - act_layer=nn.ReLU, - act_kwargs=None, - inplace=True, - drop_layer=None, - device=None, - dtype=None, - ): - try: - factory_kwargs = {'device': device, 'dtype': dtype} - super(BatchNormAct2d, self).__init__( - num_features, - eps=eps, - momentum=momentum, - affine=affine, - track_running_stats=track_running_stats, - **factory_kwargs, - ) - except TypeError: - # NOTE for backwards compat with old PyTorch w/o factory device/dtype support - super(BatchNormAct2d, self).__init__( - num_features, - eps=eps, - momentum=momentum, - affine=affine, - track_running_stats=track_running_stats, - ) - self.drop = drop_layer() if drop_layer is not None else nn.Identity() - self.act = _create_act(act_layer, act_kwargs=act_kwargs, inplace=inplace, apply_act=apply_act) - - def forward(self, x): - # cut & paste of torch.nn.BatchNorm2d.forward impl to avoid issues with torchscript and tracing - _assert(x.ndim == 4, f'expected 4D input (got {x.ndim}D input)') - - # exponential_average_factor is set to self.momentum - # (when it is available) only so that it gets updated - # in ONNX graph when this node is exported to ONNX. - if self.momentum is None: - exponential_average_factor = 0.0 - else: - exponential_average_factor = self.momentum - - if self.training and self.track_running_stats: - # TODO: if statement only here to tell the jit to skip emitting this when it is None - if self.num_batches_tracked is not None: # type: ignore[has-type] - self.num_batches_tracked.add_(1) # type: ignore[has-type] - if self.momentum is None: # use cumulative moving average - exponential_average_factor = 1.0 / float(self.num_batches_tracked) - else: # use exponential moving average - exponential_average_factor = self.momentum - - r""" - Decide whether the mini-batch stats should be used for normalization rather than the buffers. - Mini-batch stats are used in training mode, and in eval mode when buffers are None. - """ - if self.training: - bn_training = True - else: - bn_training = (self.running_mean is None) and (self.running_var is None) - - r""" - Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be - passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are - used for normalization (i.e. in eval mode when buffers are not None). - """ - x = F.batch_norm( - x, - # If buffers are not to be tracked, ensure that they won't be updated - self.running_mean if not self.training or self.track_running_stats else None, - self.running_var if not self.training or self.track_running_stats else None, - self.weight, - self.bias, - bn_training, - exponential_average_factor, - self.eps, - ) - x = self.drop(x) - x = self.act(x) - return x - - -_NORM_ACT_MAP = dict( - batchnorm=BatchNormAct2d -) -_NORM_ACT_TYPES = {m for n, m in _NORM_ACT_MAP.items()} - -# has act_layer arg to define act type -_NORM_ACT_REQUIRES_ARG = { - BatchNormAct2d} - - -def get_norm_act_layer(norm_layer, act_layer=None): - if norm_layer is None: - return None - assert isinstance(norm_layer, (type, str, types.FunctionType, partial)) - assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, partial)) - norm_act_kwargs = {} - - if isinstance(norm_layer, str): - if not norm_layer: - return None - layer_name = norm_layer.replace('_', '').lower().split('-')[0] - norm_act_layer = _NORM_ACT_MAP[layer_name] - - if norm_act_layer in _NORM_ACT_REQUIRES_ARG: - # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. - # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types - norm_act_kwargs.setdefault('act_layer', act_layer) - if norm_act_kwargs: - norm_act_layer = partial(norm_act_layer, **norm_act_kwargs) # bind/rebind args - return norm_act_layer - - -# Calculate symmetric padding for a convolution -def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int: - padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 - return padding - - -# Can SAME padding for given args be done statically? -def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_): - return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0 - - -def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]: - dynamic = False - if isinstance(padding, str): - # for any string padding, the padding will be calculated for you, one of three ways - padding = padding.lower() - if padding == 'same': - # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact - if is_static_pad(kernel_size, **kwargs): - # static case, no extra overhead - padding = get_padding(kernel_size, **kwargs) - else: - # dynamic 'SAME' padding, has runtime/GPU memory overhead - padding = 0 - dynamic = True - elif padding == 'valid': - # 'VALID' padding, same as padding=0 - padding = 0 - else: - # Default to PyTorch style 'same'-ish symmetric padding - padding = get_padding(kernel_size, **kwargs) - return padding, dynamic - - -def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): - padding = kwargs.pop('padding', '') - kwargs.setdefault('bias', False) - padding, _ = get_padding_value(padding, kernel_size, **kwargs) - return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs) - - -def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): - """ Select a 2d convolution implementation based on arguments - Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d. - - Used extensively by EfficientNet, MobileNetv3 and related networks. - """ - # Here, we've removed the options for returning Conv2dSame, MixedConv2d, or CondConv2d, as they aren't relevant - # to the mobilevit-xs model. - depthwise = kwargs.pop('depthwise', False) - # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0 - groups = in_channels if depthwise else kwargs.pop('groups', 1) - m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs) - return m - - -class ConvNormAct(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size=1, - stride=1, - padding='', - dilation=1, - groups=1, - bias=False, - apply_act=True, - norm_layer=nn.BatchNorm2d, - norm_kwargs=None, - act_layer=nn.ReLU, - act_kwargs=None, - drop_layer=None, - ): - super(ConvNormAct, self).__init__() - norm_kwargs = norm_kwargs or {} - act_kwargs = act_kwargs or {} - - self.conv = create_conv2d( - in_channels, out_channels, kernel_size, stride=stride, - padding=padding, dilation=dilation, groups=groups, bias=bias) - - # NOTE for backwards compatibility with models that use separate norm and act layer definitions - norm_act_layer = get_norm_act_layer(norm_layer, act_layer) - # NOTE for backwards (weight) compatibility, norm layer name remains `.bn` - if drop_layer: - norm_kwargs['drop_layer'] = drop_layer - self.bn = norm_act_layer( - out_channels, - apply_act=apply_act, - act_kwargs=act_kwargs, - **norm_kwargs, - ) - - @property - def in_channels(self): - return self.conv.in_channels - - @property - def out_channels(self): - return self.conv.out_channels - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -@dataclass -class LayerFn: - conv_norm_act: Callable = ConvNormAct - norm_act: Callable = BatchNormAct2d - act: Callable = nn.ReLU - attn: Optional[Callable] = None - self_attn: Optional[Callable] = None - - -def override_kwargs(block_kwargs, model_kwargs): - """ Override model level attn/self-attn/block kwargs w/ block level - - NOTE: kwargs are NOT merged across levels, block_kwargs will fully replace model_kwargs - for the block if set to anything that isn't None. - - i.e. an empty block_kwargs dict will remove kwargs set at model level for that block - """ - out_kwargs = block_kwargs if block_kwargs is not None else model_kwargs - return out_kwargs or {} # make sure None isn't returned - - -def update_block_kwargs(block_kwargs: Dict[str, Any], block_cfg: ByoBlockCfg, model_cfg: ByoModelCfg, ): - layer_fns = block_kwargs['layers'] - block_kwargs['layers'] = layer_fns - - # add additional block_kwargs specified in block_cfg or model_cfg, precedence to block if set - block_kwargs.update(override_kwargs(block_cfg.block_kwargs, model_cfg.block_kwargs)) - - -def expand_blocks_cfg(stage_blocks_cfg: Union[ByoBlockCfg, Sequence[ByoBlockCfg]]) -> List[ByoBlockCfg]: - if not isinstance(stage_blocks_cfg, Sequence): - stage_blocks_cfg = (stage_blocks_cfg,) - block_cfgs = [] - for i, cfg in enumerate(stage_blocks_cfg): - block_cfgs += [replace(cfg, d=1) for _ in range(cfg.d)] - return block_cfgs - - -def make_divisible(v, divisor=8, min_value=None, round_limit=.9): - min_value = min_value or divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < round_limit * v: - new_v += divisor - return new_v - - -def create_shortcut( - downsample_type: str, - in_chs: int, - out_chs: int, - stride: int, - dilation: Tuple[int, int], - layers: LayerFn, - **kwargs, -): - assert downsample_type in ('avg', 'conv1x1', '') - if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]: - if not downsample_type: - return None # no shortcut - return layers.conv_norm_act(in_chs, out_chs, kernel_size=1, stride=stride, dilation=dilation[0], **kwargs) - else: - return nn.Identity() # identity shortcut - - -def num_groups(group_size, channels): - if not group_size: # 0 or None - return 1 # normal conv with 1 group - else: - # NOTE group_size == 1 -> depthwise conv - assert channels % group_size == 0 - return channels // group_size - - -class BottleneckBlock(nn.Module): - """ ResNet-like Bottleneck Block - 1x1 - kxk - 1x1 - """ - - def __init__( - self, - in_chs: int, - out_chs: int, - kernel_size: int = 3, - stride: int = 1, - dilation: Tuple[int, int] = (1, 1), - bottle_ratio: float = 1., - group_size: Optional[int] = None, - downsample: str = 'avg', - attn_last: bool = False, - linear_out: bool = False, - extra_conv: bool = False, - bottle_in: bool = False, - layers: LayerFn = None, - drop_block: Callable = None, - drop_path_rate: float = 0., - ): - super(BottleneckBlock, self).__init__() - layers = layers or LayerFn() - mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio) - groups = num_groups(group_size, mid_chs) - - self.shortcut = create_shortcut( - downsample, in_chs, out_chs, - stride=stride, dilation=dilation, apply_act=False, layers=layers, - ) - - self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1) - self.conv2_kxk = layers.conv_norm_act( - mid_chs, mid_chs, kernel_size, - stride=stride, dilation=dilation[0], groups=groups, drop_layer=drop_block, - ) - if extra_conv: - self.conv2b_kxk = layers.conv_norm_act( - mid_chs, mid_chs, kernel_size, dilation=dilation[1], groups=groups) - else: - self.conv2b_kxk = nn.Identity() - self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs) - self.conv3_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False) - self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs) - self.drop_path = nn.Identity() - self.act = nn.Identity() if linear_out else layers.act(inplace=True) - - def init_weights(self, zero_init_last: bool = False): - if zero_init_last and self.shortcut is not None and getattr(self.conv3_1x1.bn, 'weight', None) is not None: - nn.init.zeros_(self.conv3_1x1.bn.weight) - for attn in (self.attn, self.attn_last): - if hasattr(attn, 'reset_parameters'): - attn.reset_parameters() - - def forward(self, x): - shortcut = x - x = self.conv1_1x1(x) - x = self.conv2_kxk(x) - x = self.conv2b_kxk(x) - x = self.attn(x) - x = self.conv3_1x1(x) - x = self.attn_last(x) - x = self.drop_path(x) - if self.shortcut is not None: - x = x + self.shortcut(shortcut) - return self.act(x) - - -class ModifiedAttention(nn.Module): - """ - The ModifiedAttention class is derived from the timm/Attention class. - We've adjusted the class to prevent folding on the batch axis and to refrain from performing matmul on tensors - with more than 3 dimensions (considering the batch axis). - Additionally, we've included the patch_area in the initialization to address the issue of 'Proxy' object - interpretation in torch.fx. - Despite these modifications, the module retains its original functionality. - """ - - def __init__( - self, - dim: int, - num_heads: int = 8, - qkv_bias: bool = False, - qk_norm: bool = False, - attn_drop: float = 0., - proj_drop: float = 0., - norm_layer: nn.Module = nn.LayerNorm, - patch_area: int = 4, - ) -> None: - super().__init__() - assert dim % num_heads == 0, 'dim should be divisible by num_heads' - self.num_heads = num_heads - self.head_dim = dim // num_heads - self.scale = self.head_dim ** -0.5 - - self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) - self.q_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity() - self.k_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity() - self.attn_drop = nn.Dropout(attn_drop) - self.proj = nn.Linear(dim, dim) - self.proj_drop = nn.Dropout(proj_drop) - - # Here, we've opted to include the patch_area directly instead of retrieving it within the forward method. - self.patch_area = patch_area - - def forward(self, x: torch.Tensor) -> torch.Tensor: - B, P, N, C = x.shape - # [B, P, N, 3*C] --> [B, P, N, 3*C] - qkv = self.qkv(x) - # [B, P, N, 3*C] --> [B, P, N, 3, num_heads, head_dim] - qkv = qkv.reshape(B, P, N, 3, self.num_heads, self.head_dim) - # [B, P, N, 3, num_heads, head_dim] --> [B, 3, num_heads, P, N, head_dim] - qkv = qkv.permute(0, 3, 4, 1, 2, 5) - # [B, 3, num_heads, P, N, head_dim] --> 3 * [B, num_heads, P, N, head_dim] - q, k, v = qkv.unbind(1) - - # We've adjusted this section to calculate the attention individually for each head and patch. - head_list = [] - - # [B, num_heads, P, N, head_dim] --> num_heads * [B, P, N, head_dim] - q_split = q.unbind(1) - k_split = k.unbind(1) - v_split = v.unbind(1) - for head in range(self.num_heads): - # [B, P, N, head_dim] --> P * [B, N, head_dim] - k_head = k_split[head].unbind(1) - q_head = q_split[head].unbind(1) - v_head = v_split[head].unbind(1) - - iter_list = [] - # Calculate the attention score head and patch - for patch in range(self.patch_area): - # [B, N, head_dim] - k_patch = k_head[patch] - q_patch = q_head[patch] - v_patch = v_head[patch] - - k_patch = self.k_norm(k_patch) - q_patch = self.q_norm(q_patch) - - q_patch = q_patch * self.scale - - # [B, N, head_dim] --> [B, head_dim, N] - k_patch = k_patch.transpose(-2, -1) - - # [B, N, head_dim] @ [B, head_dim, N] --> [B, N, N] - attn_iter = q_patch @ k_patch - - attn_iter = attn_iter.softmax(dim=-1) - attn_iter = self.attn_drop(attn_iter) - - # [B, N, N] @ [B, N, head_dim] --> [B, N, head_dim] - x_iter = attn_iter @ v_patch - - # P * [B, N, head_dim] - iter_list.append(x_iter) - - # P * [B, N, head_dim] --> [B, P, N, head_dim] - output_stacked = torch.stack(iter_list, dim=1) - - # num_heads * [B, P, N, head_dim] - head_list.append(output_stacked) - - # num_heads * [B, P, N, head_dim] --> [B, P, num_heads, N, head_dim] - concat_heads = torch.stack(head_list, dim=2) - - # [B, P, num_heads, N, head_dim] --> [B, P, N, num_heads, head_dim] - x = concat_heads.transpose(2, 3) - - # [B, P, N, num_heads, head_dim] --> [B, P, N, C] - x = x.reshape(B, P, N, C) - x = self.proj(x) - x = self.proj_drop(x) - return x - - -class Mlp(nn.Module): - """ MLP as used in Vision Transformer, MLP-Mixer and related networks - """ - - def __init__( - self, - in_features, - hidden_features=None, - out_features=None, - act_layer=nn.GELU, - norm_layer=None, - bias=True, - drop=0., - use_conv=False, - ): - super().__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - bias = to_2tuple(bias) - drop_probs = to_2tuple(drop) - linear_layer = partial(nn.Conv2d, kernel_size=1) if use_conv else nn.Linear - - self.fc1 = linear_layer(in_features, hidden_features, bias=bias[0]) - self.act = act_layer() - self.drop1 = nn.Dropout(drop_probs[0]) - self.norm = norm_layer(hidden_features) if norm_layer is not None else nn.Identity() - self.fc2 = linear_layer(hidden_features, out_features, bias=bias[1]) - self.drop2 = nn.Dropout(drop_probs[1]) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop1(x) - x = self.norm(x) - x = self.fc2(x) - x = self.drop2(x) - return x - - -class TransformerBlock(nn.Module): - def __init__( - self, - dim: int, - num_heads: int, - mlp_ratio: float = 4., - qkv_bias: bool = False, - qk_norm: bool = False, - proj_drop: float = 0., - attn_drop: float = 0., - init_values: Optional[float] = None, - drop_path: float = 0., - act_layer: nn.Module = nn.GELU, - norm_layer: nn.Module = nn.LayerNorm, - mlp_layer: nn.Module = Mlp, - patch_area: float = 4. - ) -> None: - super().__init__() - self.norm1 = norm_layer(dim) - self.attn = ModifiedAttention( - dim, - num_heads=num_heads, - qkv_bias=qkv_bias, - qk_norm=qk_norm, - attn_drop=attn_drop, - proj_drop=proj_drop, - norm_layer=norm_layer, - patch_area=int(patch_area), - ) - self.ls1 = nn.Identity() - self.drop_path1 = nn.Identity() - - self.norm2 = norm_layer(dim) - self.mlp = mlp_layer( - in_features=dim, - hidden_features=int(dim * mlp_ratio), - act_layer=act_layer, - drop=proj_drop, - ) - self.ls2 = nn.Identity() - self.drop_path2 = nn.Identity() - - def forward(self, x: torch.Tensor) -> torch.Tensor: - x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x)))) - x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x)))) - return x - - -class MobileVitBlock(nn.Module): - """ MobileViT block - Paper: https://arxiv.org/abs/2110.02178?context=cs.LG - """ - - def __init__( - self, - in_chs: int, - out_chs: Optional[int] = None, - kernel_size: int = 3, - stride: int = 1, - bottle_ratio: float = 1.0, - group_size: Optional[int] = None, - dilation: Tuple[int, int] = (1, 1), - mlp_ratio: float = 2.0, - transformer_dim: Optional[int] = None, - transformer_depth: int = 2, - patch_size: int = 8, - num_heads: int = 4, - attn_drop: float = 0., - drop: int = 0., - no_fusion: bool = False, - drop_path_rate: float = 0., - layers: LayerFn = None, - transformer_norm_layer: Callable = nn.LayerNorm, - **kwargs, # eat unused args - ): - super(MobileVitBlock, self).__init__() - - layers = layers or LayerFn() - groups = num_groups(group_size, in_chs) - out_chs = out_chs or in_chs - transformer_dim = transformer_dim or make_divisible(bottle_ratio * in_chs) - - self.patch_size = to_2tuple(patch_size) - self.patch_area = self.patch_size[0] * self.patch_size[1] - - self.conv_kxk = layers.conv_norm_act( - in_chs, in_chs, kernel_size=kernel_size, - stride=stride, groups=groups, dilation=dilation[0]) - self.conv_1x1 = nn.Conv2d(in_chs, transformer_dim, kernel_size=1, bias=False) - - self.transformer = nn.Sequential(*[ - TransformerBlock( - transformer_dim, - mlp_ratio=mlp_ratio, - num_heads=num_heads, - qkv_bias=True, - attn_drop=attn_drop, - proj_drop=drop, - drop_path=drop_path_rate, - act_layer=layers.act, - norm_layer=transformer_norm_layer, - patch_area=self.patch_area, - ) - for _ in range(transformer_depth) - ]) - self.norm = transformer_norm_layer(transformer_dim) - - self.conv_proj = layers.conv_norm_act(transformer_dim, out_chs, kernel_size=1, stride=1) - - if no_fusion: - self.conv_fusion = None - else: - self.conv_fusion = layers.conv_norm_act(in_chs + out_chs, out_chs, kernel_size=kernel_size, stride=1) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - shortcut = x - - # Local representation - x = self.conv_kxk(x) - x = self.conv_1x1(x) - - # Unfold (feature map -> patches) - patch_h, patch_w = self.patch_size - B, C, H, W = x.shape - new_h, new_w = math.ceil(H / patch_h) * patch_h, math.ceil(W / patch_w) * patch_w - num_patch_h, num_patch_w = new_h // patch_h, new_w // patch_w # n_h, n_w - num_patches = num_patch_h * num_patch_w # N - interpolate = False - - # We've adjusted this part to avoid folding on the batch axis. - # We've made a change here. Instead of fetching the shape as [B * C * n_h, n_w, p_h, p_w], we now fetch it as - # [B, C * n_h, n_w, p_h, p_w]. - # [B, C, H, W] --> [B, C * n_h, p_h, n_w, p_w] - x = x.reshape(B, C * num_patch_h, patch_h, num_patch_w, patch_w) - # [B, C * n_h, p_h, n_w, p_w] --> [B, C * n_h, n_w, p_h, p_w] - x = x.transpose(2, 3) - - # We've made a change here. Instead of fetching the shape as [BP, N, C], we now fetch it as [B, P, N, C]. - # [B, C * n_h, n_w, p_h, p_w] --> [B, C, N, P] - x = x.reshape(B, C, num_patches, self.patch_area) - # [B, C, N, P] --> [B, P, N, C] - x = x.transpose(1, 3) - - # Global representations - x = self.transformer(x) - x = self.norm(x) - - # We've adjusted this part to avoid folding on the batch axis. - # Fold (patch -> feature map) - # [B, P, N, C] --> [B, C, N, P] - x = x.transpose(1, 3) - # [B, C, N, P] --> [B, C * n_h, n_w, p_h, p_w] - x = x.reshape(B, C * num_patch_h, num_patch_w, patch_h, patch_w) - # [B, C * n_h, n_w, p_h, p_w] --> [B, C * n_h, p_h, n_w, p_w] - x = x.transpose(2, 3) - # [B, C * n_h, p_h, n_w, p_w] --> [B, C, n_h * p_h, n_w * p_w] - x = x.reshape(B, C, num_patch_h * patch_h, num_patch_w * patch_w) - if interpolate: - x = F.interpolate(x, size=(H, W), mode="bilinear", align_corners=False) - - x = self.conv_proj(x) - if self.conv_fusion is not None: - x = self.conv_fusion(torch.cat((shortcut, x), dim=1)) - return x - - -_block_registry = dict( - bottle=BottleneckBlock, - mobilevit=MobileVitBlock, -) - - -def create_block(block: Union[str, nn.Module], **kwargs): - if isinstance(block, (nn.Module, partial)): - return block(**kwargs) - assert block in _block_registry, f'Unknown block type ({block}' - return _block_registry[block](**kwargs) - - -def create_byob_stages( - cfg: ByoModelCfg, - drop_path_rate: float, - output_stride: int, - stem_feat: Dict[str, Any], - feat_size: Optional[int] = None, - layers: Optional[LayerFn] = None, - block_kwargs_fn: Optional[Callable] = update_block_kwargs, -): - layers = layers or LayerFn() - feature_info = [] - block_cfgs = [expand_blocks_cfg(s) for s in cfg.blocks] - depths = [sum([bc.d for bc in stage_bcs]) for stage_bcs in block_cfgs] - dpr = [x.tolist() for x in torch.linspace(0, drop_path_rate, sum(depths)).split(depths)] - dilation = 1 - net_stride = stem_feat['reduction'] - prev_chs = stem_feat['num_chs'] - prev_feat = stem_feat - stages = [] - for stage_idx, stage_block_cfgs in enumerate(block_cfgs): - stride = stage_block_cfgs[0].s - if stride != 1 and prev_feat: - feature_info.append(prev_feat) - if net_stride >= output_stride and stride > 1: - dilation *= stride - stride = 1 - net_stride *= stride - first_dilation = 1 if dilation in (1, 2) else 2 - - blocks = [] - for block_idx, block_cfg in enumerate(stage_block_cfgs): - out_chs = make_divisible(block_cfg.c * cfg.width_factor) - group_size = block_cfg.gs - if isinstance(group_size, Callable): - group_size = group_size(out_chs, block_idx) - block_kwargs = dict( # Blocks used in this model must accept these arguments - in_chs=prev_chs, - out_chs=out_chs, - stride=stride if block_idx == 0 else 1, - dilation=(first_dilation, dilation), - group_size=group_size, - bottle_ratio=block_cfg.br, - downsample=cfg.downsample, - drop_path_rate=dpr[stage_idx][block_idx], - layers=layers, - ) - if block_cfg.type in ('self_attn',): - # add feat_size arg for blocks that support/need it - block_kwargs['feat_size'] = feat_size - block_kwargs_fn(block_kwargs, block_cfg=block_cfg, model_cfg=cfg) - blocks += [create_block(block_cfg.type, **block_kwargs)] - first_dilation = dilation - prev_chs = out_chs - if stride > 1 and block_idx == 0: - feat_size = reduce_feat_size(feat_size, stride) - - stages += [nn.Sequential(*blocks)] - prev_feat = dict(num_chs=prev_chs, reduction=net_stride, module=f'stages.{stage_idx}') - - feature_info.append(prev_feat) - return nn.Sequential(*stages), feature_info - - -def create_byob_stem( - in_chs: int, - out_chs: int, - stem_type: str = '', - pool_type: str = '', - feat_prefix: str = 'stem', - layers: LayerFn = None, -): - layers = layers or LayerFn() - stem = layers.conv_norm_act(in_chs, out_chs, 3, stride=2) - feature_info = [dict(num_chs=out_chs, reduction=2, module=feat_prefix)] - return stem, feature_info - - -def reduce_feat_size(feat_size, stride=2): - return None if feat_size is None else tuple([s // stride for s in feat_size]) - - -def get_layer_fns(cfg: ByoModelCfg): - act = get_act_layer(cfg.act_layer) - norm_act = get_norm_act_layer(norm_layer=cfg.norm_layer, act_layer=act) - conv_norm_act = partial(ConvNormAct, norm_layer=cfg.norm_layer, act_layer=act) - # To streamline the process, we've opted to set None for attn and self_attn instead of invoking the get_attn - # function, in line with the configuration of the mobilevit-xs model. - - attn = None - self_attn = None - layer_fn = LayerFn(conv_norm_act=conv_norm_act, norm_act=norm_act, act=act, attn=attn, self_attn=self_attn) - return layer_fn - - -class SelectAdaptivePool2d(nn.Module): - """Selectable global pooling layer with dynamic input kernel size - """ - - def __init__( - self, - output_size: _int_tuple_2_t = 1, - pool_type: str = 'fast', - flatten: bool = False, - input_fmt: str = 'NCHW', - ): - super(SelectAdaptivePool2d, self).__init__() - assert input_fmt in ('NCHW', 'NHWC') - self.pool_type = pool_type or '' # convert other falsy values to empty string for consistent TS typing - self.pool = nn.AdaptiveAvgPool2d(output_size) - self.flatten = nn.Flatten(1) if flatten else nn.Identity() - - def is_identity(self): - return not self.pool_type - - def forward(self, x): - x = self.pool(x) - x = self.flatten(x) - return x - - def feat_mult(self): - return adaptive_pool_feat_mult(self.pool_type) - - def __repr__(self): - return self.__class__.__name__ + '(' \ - + 'pool_type=' + self.pool_type \ - + ', flatten=' + str(self.flatten) + ')' - - -def _create_pool( - num_features: int, - num_classes: int, - pool_type: str = 'avg', - use_conv: bool = False, - input_fmt: Optional[str] = None, -): - flatten_in_pool = not use_conv # flatten when we use a Linear layer after pooling - if not pool_type: - assert num_classes == 0 or use_conv, \ - 'Pooling can only be disabled if classifier is also removed or conv classifier is used' - flatten_in_pool = False # disable flattening if pooling is pass-through (no pooling) - global_pool = SelectAdaptivePool2d( - pool_type=pool_type, - flatten=flatten_in_pool, - input_fmt=input_fmt, - ) - num_pooled_features = num_features * global_pool.feat_mult() - return global_pool, num_pooled_features - - -def create_classifier( - num_features: int, - num_classes: int, - pool_type: str = 'avg', - use_conv: bool = False, - input_fmt: str = 'NCHW', - drop_rate: Optional[float] = None, -): - global_pool, num_pooled_features = _create_pool( - num_features, - num_classes, - pool_type, - use_conv=use_conv, - input_fmt=input_fmt, - ) - fc = _create_fc( - num_pooled_features, - num_classes, - use_conv=use_conv, - ) - if drop_rate is not None: - dropout = nn.Dropout(drop_rate) - return global_pool, dropout, fc - return global_pool, fc - - -class ClassifierHead(nn.Module): - """Classifier head w/ configurable global pooling and dropout.""" - - def __init__( - self, - in_features: int, - num_classes: int, - pool_type: str = 'avg', - drop_rate: float = 0., - use_conv: bool = False, - input_fmt: str = 'NCHW', - ): - """ - Args: - in_features: The number of input features. - num_classes: The number of classes for the final classifier layer (output). - pool_type: Global pooling type, pooling disabled if empty string (''). - drop_rate: Pre-classifier dropout rate. - """ - super(ClassifierHead, self).__init__() - self.in_features = in_features - self.use_conv = use_conv - self.input_fmt = input_fmt - - global_pool, fc = create_classifier( - in_features, - num_classes, - pool_type, - use_conv=use_conv, - input_fmt=input_fmt, - ) - self.global_pool = global_pool - self.drop = nn.Dropout(drop_rate) - self.fc = fc - self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity() - - def reset(self, num_classes, pool_type=None): - if pool_type is not None and pool_type != self.global_pool.pool_type: - self.global_pool, self.fc = create_classifier( - self.in_features, - num_classes, - pool_type=pool_type, - use_conv=self.use_conv, - input_fmt=self.input_fmt, - ) - self.flatten = nn.Flatten(1) if self.use_conv and pool_type else nn.Identity() - else: - num_pooled_features = self.in_features * self.global_pool.feat_mult() - self.fc = _create_fc( - num_pooled_features, - num_classes, - use_conv=self.use_conv, - ) - - def forward(self, x, pre_logits: bool = False): - x = self.global_pool(x) - x = self.drop(x) - if pre_logits: - return self.flatten(x) - x = self.fc(x) - return self.flatten(x) - - -class MobileViTXSPyTorch(nn.Module, PyTorchModelHubMixin): - """ - This class defines a new model variant called MobileViTXSPyTorch. - It is derived from the timm/ByobNet class but is tailored to utilize the mobilevit-xs configuration by default. - """ - - def __init__(self): - super().__init__() - variant = 'mobilevit_xs' - cfg = model_cfgs[variant] - num_classes = 1000 - in_chans = 3 - global_pool = 'avg' - output_stride = 32 - drop_rate = 0. - drop_path_rate = 0. - zero_init_last = True - - self.num_classes = num_classes - self.drop_rate = drop_rate - - layers = get_layer_fns(cfg) - feat_size = None - - self.feature_info = [] - stem_chs = int(round((cfg.stem_chs or cfg.blocks[0].c) * cfg.width_factor)) - self.stem, stem_feat = create_byob_stem(in_chans, stem_chs, cfg.stem_type, cfg.stem_pool, layers=layers) - self.feature_info.extend(stem_feat[:-1]) - feat_size = reduce_feat_size(feat_size, stride=stem_feat[-1]['reduction']) - - self.stages, stage_feat = create_byob_stages( - cfg, - drop_path_rate, - output_stride, - stem_feat[-1], - layers=layers, - feat_size=feat_size, - ) - self.feature_info.extend(stage_feat[:-1]) - - prev_chs = stage_feat[-1]['num_chs'] - if cfg.num_features: - self.num_features = int(round(cfg.width_factor * cfg.num_features)) - self.final_conv = layers.conv_norm_act(prev_chs, self.num_features, 1) - else: - self.num_features = prev_chs - self.final_conv = nn.Identity() - self.feature_info += [ - dict(num_chs=self.num_features, reduction=stage_feat[-1]['reduction'], module='final_conv')] - - self.head = ClassifierHead( - self.num_features, - num_classes, - pool_type=global_pool, - drop_rate=self.drop_rate, - ) - - # init weights - named_apply(partial(_init_weights, zero_init_last=zero_init_last), self) - - # We have defined pretrained_cfg to represent the configuration specific to mobilevit-xs pretrained model, - # including relevant items for dataset and data loader. - self.pretrained_cfg = {'input_size': (3, 256, 256), - 'fixed_input_size': False, - 'interpolation': 'bicubic', - 'crop_pct': 0.9, - 'crop_mode': 'center', - 'mean': (0.0, 0.0, 0.0), - 'std': (1.0, 1.0, 1.0), - 'num_classes': 1000, - 'pool_size': (8, 8)} - - def forward_features(self, x): - x = self.stem(x) - x = self.stages(x) - x = self.final_conv(x) - return x - - def forward_head(self, x, pre_logits: bool = False): - return self.head(x, pre_logits=pre_logits) - - def forward(self, x): - x = self.forward_features(x) - x = self.forward_head(x) - return x - - def save_pretrained(self, save_directory, **kwargs): - - # Call the original save_pretrained method - super().save_pretrained(save_directory, **kwargs) diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/LICENSE b/tutorials/mct_model_garden/models_pytorch/yolov8/LICENSE deleted file mode 100644 index bae94e189..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/__init__.py b/tutorials/mct_model_garden/models_pytorch/yolov8/__init__.py deleted file mode 100644 index e11a7cc60..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/postprocess_yolov8_seg.py b/tutorials/mct_model_garden/models_pytorch/yolov8/postprocess_yolov8_seg.py deleted file mode 100644 index 03a2c729c..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/postprocess_yolov8_seg.py +++ /dev/null @@ -1,270 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -""" -Part of this code was based on Ultralytics implementation. For more details, refer to the original repository: -https://github.com/ultralytics/ultralytics -""" -from typing import List -import numpy as np -import cv2 -from typing import Tuple - -from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8_postprocess import nms - - -def combined_nms_seg(batch_boxes, batch_scores, batch_masks, iou_thres: float = 0.3, conf: float = 0.1, max_out_dets: int = 300): - """ - Perform combined Non-Maximum Suppression (NMS) and segmentation mask processing for batched inputs. - - This function processes batches of bounding boxes, confidence scores, and segmentation masks by applying - class-wise NMS to filter out overlapping boxes based on their Intersection over Union (IoU) and confidence scores. - It also filters detections based on a confidence threshold and returns the final bounding boxes, scores, class indices, - and corresponding segmentation masks. - - Args: - batch_boxes (List[np.ndarray]): List of arrays, each containing bounding boxes for an image in the batch. - Each array is of shape (N, 4), where N is the number of detections, - and each box is represented as [y1, x1, y2, x2]. - batch_scores (List[np.ndarray]): List of arrays, each containing confidence scores for detections in an image. - Each array is of shape (N, num_classes), where N is the number of detections. - batch_masks (List[np.ndarray]): List of arrays, each containing segmentation masks for detections in an image. - Each array is of shape (num_classes, H, W), where H and W are the dimensions - of the output mask. - iou_thres (float, optional): IoU threshold for NMS. Default is 0.3. - conf (float, optional): Confidence threshold to filter detections. Default is 0.1. - max_out_dets (int, optional): Maximum number of output detections to keep after NMS. Default is 300. - - Returns: - List[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]: A list of tuples, each containing: - - Bounding boxes of the final detections (shape: (K, 4)) - - Confidence scores of the final detections (shape: (K,)) - - Class indices of the final detections (shape: (K,)) - - Segmentation masks corresponding to the final detections (shape: (K, H, W)) - where K is the number of final detections kept after NMS and confidence filtering. - """ - nms_results = [] - for boxes, scores, masks in zip(batch_boxes, batch_scores, batch_masks): - # Compute maximum scores and corresponding class indices - class_indices = np.argmax(scores, axis=1) - max_scores = np.amax(scores, axis=1) - detections = np.concatenate([boxes, np.expand_dims(max_scores, axis=1), np.expand_dims(class_indices, axis=1)], axis=1) - - masks = np.transpose(masks, (1, 0)) - valid_detections = max_scores > conf - detections = detections[valid_detections] - masks = masks[valid_detections] - - if len(detections) == 0: - nms_results.append((np.array([]), np.array([]), np.array([]), np.array([[]]))) - continue - - # Sort detections by score in descending order - sorted_indices = np.argsort(-detections[:, 4]) - detections = detections[sorted_indices] - masks = masks[sorted_indices] - - # Perform class-wise NMS - unique_classes = np.unique(detections[:, 5]) - all_indices = [] - - for cls in unique_classes: - cls_indices = np.where(detections[:, 5] == cls)[0] - cls_boxes = detections[cls_indices, :4] - cls_scores = detections[cls_indices, 4] - cls_valid_indices = nms(cls_boxes, cls_scores, iou_thres=iou_thres, max_out_dets=len(cls_indices)) # Use all available for NMS - all_indices.extend(cls_indices[cls_valid_indices]) - - if len(all_indices) == 0: - nms_results.append((np.array([]), np.array([]), np.array([]), np.array([[]]))) - continue - - # Sort all indices by score and limit to max_out_dets - all_indices = np.array(all_indices) - all_indices = all_indices[np.argsort(-detections[all_indices, 4])] - final_indices = all_indices[:max_out_dets] - - final_detections = detections[final_indices] - final_masks = masks[final_indices] - - # Extract class indices, bounding boxes, and scores - nms_classes = final_detections[:, 5] - nms_bbox = final_detections[:, :4] - nms_scores = final_detections[:, 4] - - # Append results including masks - nms_results.append((nms_bbox, nms_scores, nms_classes, final_masks)) - - return nms_results - - -def crop_mask(masks, boxes): - """ - It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box - - Args: - masks (numpy.ndarray): [h, w, n] tensor of masks - boxes (numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form - - Returns: - (numpy.ndarray): The masks are being cropped to the bounding box. - """ - n, w, h = masks.shape - x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1) - c = np.arange(h, dtype=np.float32)[None, None, :] - r = np.arange(w, dtype=np.float32)[None, :, None] - - return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) - - -def calculate_padding(original_shape, target_shape): - """ - Calculate the padding needed to center the image in the target shape and the scale factor used for resizing. - - Args: - original_shape (tuple): The height and width of the original image. - target_shape (tuple): The desired height and width for scaling the image. - - Returns: - tuple: A tuple containing the padding widths (pad_width, pad_height) and the scale factor. - """ - orig_height, orig_width = original_shape[:2] - target_height, target_width = target_shape - larger_dim = max(orig_height, orig_width) - if not target_height==target_width: - print('model input must be square') - scale = target_height/larger_dim - - scaled_width = int(orig_width * scale) - scaled_height = int(orig_height * scale) - - pad_width = max((target_width - scaled_width) // 2, 0) - pad_height = max((target_height - scaled_height) // 2, 0) - - return pad_width, pad_height, scale - - - -def crop_to_original(mask, pad_width, pad_height, original_shape, scale): - """ - Crop the mask to the original image dimensions after padding and scaling adjustments. - - Args: - mask (numpy.ndarray): The mask to be cropped. - pad_width (int): The padding width applied to the mask. - pad_height (int): The padding height applied to the mask. - original_shape (tuple): The original dimensions of the image (height, width). - scale (float): The scaling factor applied to the original dimensions. - - Returns: - numpy.ndarray: The cropped mask. - """ - end_height = min(pad_height + (original_shape[0]*scale), mask.shape[0]) - end_width = min(pad_width + (original_shape[1]*scale), mask.shape[1]) - cropped_mask = mask[int(pad_height):int(end_height), int(pad_width):int(end_width)] - return cropped_mask - -def process_masks(masks, boxes, orig_img_shape, model_input_size): - """ - Adjusts and crops masks for detected objects to fit original image dimensions. - - Args: - masks (numpy.ndarray): Input masks to be processed. - boxes (numpy.ndarray): Bounding boxes for cropping masks. - orig_img_shape (tuple): Original dimensions of the image. - model_input_size (tuple): Input size required by the model. - - Returns: - numpy.ndarray: Processed masks adjusted and cropped to fit the original image dimensions. - - Processing Steps: - 1. Calculate padding and scaling for model input size adjustment. - 2. Apply sigmoid to normalize mask values. - 3. Resize masks to model input size. - 4. Crop masks to original dimensions using calculated padding. - 5. Resize cropped masks to original dimensions. - 6. Crop masks per bounding boxes for individual objects. - """ - if masks.size == 0: # Check if the masks array is empty - return np.array([]) - pad_width, pad_height, scale = calculate_padding(orig_img_shape, model_input_size) - masks = 1 / (1 + np.exp(-masks)) - orig_height, orig_width = orig_img_shape[:2] - masks = np.transpose(masks, (2, 1, 0)) # Change to HWC format - masks = cv2.resize(masks, model_input_size, interpolation=cv2.INTER_LINEAR) - - masks = np.expand_dims(masks, -1) if len(masks.shape) == 2 else masks - masks = np.transpose(masks, (2, 1, 0)) # Change back to CHW format - #crop masks based on padding - masks = [crop_to_original(mask, pad_width, pad_height, orig_img_shape, scale) for mask in masks] - masks = np.stack(masks, axis=0) - - masks = np.transpose(masks, (2, 1, 0)) # Change to HWC format - masks = cv2.resize(masks, (orig_height, orig_width), interpolation=cv2.INTER_LINEAR) - masks = np.expand_dims(masks, -1) if len(masks.shape) == 2 else masks - masks = np.transpose(masks, (2, 1, 0)) # Change back to CHW format - # Crop masks based on bounding boxes - masks = crop_mask(masks, boxes) - - return masks - - -def postprocess_yolov8_inst_seg(outputs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], - conf: float = 0.1, - iou_thres: float = 0.3, - max_out_dets: int = 300) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """ - Post-processes the outputs of a YOLOv8 instance segmentation model. - - Args: - outputs (Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]): Tuple containing the outputs from the model: - - y_bb: Bounding box coordinates - - y_cls: Class probabilities - - ymask_weights: Weights for combining masks - - y_masks: Segmentation masks - conf (float): Confidence threshold for filtering detections. - iou_thres (float): IOU threshold for non-maximum suppression. - max_out_dets (int): Maximum number of detections to return. - - Returns: - Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Tuple containing: - - nms_bbox: Bounding boxes after NMS. - - nms_scores: Scores of the bounding boxes. - - nms_classes: Class IDs of the bounding boxes. - - final_masks: Combined segmentation masks after applying mask weights. - """ - - - y_bb, y_cls, ymask_weights, y_masks = outputs - y_bb= np.transpose(y_bb, (0,2,1)) - y_cls= np.transpose(y_cls, (0,2,1)) - y_bb = y_bb * 640 #image size - detect_out = np.concatenate((y_bb, y_cls), 1) - xd = detect_out.transpose([0, 2, 1]) - nms_bbox, nms_scores, nms_classes, ymask_weights = combined_nms_seg(xd[..., :4], xd[..., 4:84], ymask_weights, iou_thres, conf, max_out_dets)[0] - y_masks = y_masks.squeeze(0) - - if ymask_weights.size == 0: - return np.array([]), np.array([]), np.array([]), np.array([]) - ymask_weights = ymask_weights.transpose(1, 0) - - final_masks = np.tensordot(ymask_weights, y_masks, axes=([0], [0])) - - return nms_bbox, nms_scores, nms_classes, final_masks - - diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8-seg.yaml b/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8-seg.yaml deleted file mode 100644 index 7eaa0ab19..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8-seg.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -# The following code was mostly duplicated from https://github.com/ultralytics/ultralytics -# -# Ultralytics YOLO 🚀, AGPL-3.0 license -# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment - -# Parameters -nc: 80 # number of classes -scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n' - # [depth, width, max_channels] - n: [0.33, 0.25, 1024] - s: [0.33, 0.50, 1024] - m: [0.67, 0.75, 768] - l: [1.00, 1.00, 512] - x: [1.00, 1.25, 512] - -# YOLOv8.0n backbone -backbone: - # [from, repeats, module, args] - - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - - [-1, 3, C2f, [128, True]] - - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - - [-1, 6, C2f, [256, True]] - - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - - [-1, 6, C2f, [512, True]] - - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - - [-1, 3, C2f, [1024, True]] - - [-1, 1, SPPF, [1024, 5]] # 9 - -# YOLOv8.0n head -head: - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - - [-1, 3, C2f, [512]] # 12 - - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - - [-1, 3, C2f, [256]] # 15 (P3/8-small) - - - [-1, 1, Conv, [256, 3, 2]] - - [[-1, 12], 1, Concat, [1]] # cat head P4 - - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - - - [-1, 1, Conv, [512, 3, 2]] - - [[-1, 9], 1, Concat, [1]] # cat head P5 - - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - - - [[15, 18, 21], 1, Segment, [nc, 32, 64]] # Segment(P3, P4, P5) changed from 256 \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8.py b/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8.py deleted file mode 100644 index 973872c82..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8.py +++ /dev/null @@ -1,693 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -""" -Yolov8n Object Detection Model - PyTorch implementation - -This code contains a PyTorch implementation of Yolov8n object detection model, following -https://github.com/ultralytics/ultralytics. - -Usage: - model, cfg_dict = yolov8_pytorch("yolov8n.yaml") - pretrained_weights = torch.load('/path/to/pretrained/yolov8n.pt')['model'].state_dict() - model.load_state_dict(pretrained_weights, strict=False) - model.eval() - -Main changes: - Modify layers to make them more suitable for quantization - torch.fx compatibility - Detect head (mainly the box decoding part that was optimized for model quantization) - Inheritance class from HuggingFace - Implement box decoding into Detect Layer - -Notes and Limitations: -- The model has been tested only with the default settings from Ultralytics, specifically using a 640x640 input resolution and 80 object classes. -- Anchors and strides are hardcoded as constants within the model, meaning they are not included in the weights file from Ultralytics. - -The code is organized as follows: -- Classes definitions of Yolov8n building blocks: Conv, Bottleneck, C2f, SPPF, Upsample, Concaat, DFL and Detect -- Detection Model definition: ModelPyTorch -- PostProcessWrapper Wrapping the Yolov8n model with PostProcess layer (Specifically, sony_custom_layers/multiclass_nms) -- A getter function for getting a new instance of the model - -For more details on the Yolov8n model, refer to the original repository: -https://github.com/ultralytics/ultralytics - -""" -import contextlib -import math -import re -from copy import deepcopy -from typing import Dict, List, Tuple, Any - -import numpy as np -import torch -import torch.nn as nn -import yaml -from torch import Tensor -from huggingface_hub import PyTorchModelHubMixin -import importlib - -from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device -from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8_postprocess import postprocess_yolov8_keypoints -if importlib.util.find_spec("sony_custom_layers"): - from sony_custom_layers.pytorch.object_detection.nms import multiclass_nms - - -def yaml_load(file: str = 'data.yaml', append_filename: bool = False) -> Dict[str, any]: - """ - Load YAML data from a file. - - Args: - file (str, optional): File name. Default is 'data.yaml'. - append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False. - - Returns: - dict: YAML data and file name. - """ - with open(file, errors='ignore', encoding='utf-8') as f: - s = f.read() # string - if not s.isprintable(): # remove special characters - s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s) - return {**yaml.safe_load(s), 'yaml_file': str(file)} if append_filename else yaml.safe_load(s) - - -def autopad(k, p=None, d=1): # kernel, padding, dilation - """Pad to 'same' shape outputs.""" - if d > 1: - k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size - if p is None: - p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad - return p - - -def make_divisible(x, divisor): - """Returns nearest x divisible by divisor.""" - if isinstance(divisor, torch.Tensor): - divisor = int(divisor.max()) # to int - return math.ceil(x / divisor) * divisor - - -class Conv(nn.Module): - """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" - - default_act = nn.SiLU() # default activation - - def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): - """Initialize Conv layer with given arguments including activation.""" - super().__init__() - self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) - self.bn = nn.BatchNorm2d(c2) - self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() - - def forward(self, x): - """Apply convolution, batch normalization and activation to input tensor.""" - return self.act(self.bn(self.conv(x))) - - def forward_fuse(self, x): - """Perform transposed convolution of 2D data.""" - return self.act(self.conv(x)) - - -class Bottleneck(nn.Module): - """Standard bottleneck.""" - - def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): - """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and - expansion. - """ - super().__init__() - c_ = int(c2 * e) # hidden channels - self.cv1 = Conv(c1, c_, k[0], 1) - self.cv2 = Conv(c_, c2, k[1], 1, g=g) - self.add = shortcut and c1 == c2 - - def forward(self, x): - """'forward()' applies the YOLO FPN to input data.""" - return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) - - -class C2f(nn.Module): - """Faster Implementation of CSP Bottleneck with 2 convolutions.""" - - def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): - """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups, - expansion. - """ - super().__init__() - self.c = int(c2 * e) # hidden channels - self.cv1 = Conv(c1, 2 * self.c, 1, 1) - self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) - self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) - - def forward(self, x): - """Forward pass through C2f layer.""" - - y1 = self.cv1(x).chunk(2, 1) - y = [y1[0], y1[1]] - y.extend(m(y[-1]) for m in self.m) - return self.cv2(torch.cat(y, 1)) - - def forward_split(self, x): - """Forward pass using split() instead of chunk().""" - y = list(self.cv1(x).split((self.c, self.c), 1)) - y.extend(m(y[-1]) for m in self.m) - return self.cv2(torch.cat(y, 1)) - - -class SPPF(nn.Module): - """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher.""" - - def __init__(self, c1, c2, k=5): - """ - Initializes the SPPF layer with given input/output channels and kernel size. - - This module is equivalent to SPP(k=(5, 9, 13)). - """ - super().__init__() - c_ = c1 // 2 # hidden channels - self.cv1 = Conv(c1, c_, 1, 1) - self.cv2 = Conv(c_ * 4, c2, 1, 1) - self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) - - def forward(self, x): - """Forward pass through Ghost Convolution block.""" - x = self.cv1(x) - y1 = self.m(x) - y2 = self.m(y1) - return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) - - -class Concat(nn.Module): - """Concatenate a list of tensors along dimension.""" - - def __init__(self, dimension=1): - """Concatenates a list of tensors along a specified dimension.""" - super().__init__() - self.d = dimension - - def forward(self, x): - """Forward pass for the YOLOv8 mask Proto module.""" - return torch.cat(x, self.d) - - -class DFL(nn.Module): - """ - Integral module of Distribution Focal Loss (DFL). - - Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 - """ - - def __init__(self, c1=16): - """Initialize a convolutional layer with a given number of input channels.""" - super().__init__() - self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) - x = torch.arange(c1, dtype=torch.float) - self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) - self.c1 = c1 - - def forward(self, x): - """Applies a transformer layer on input tensor 'x' and returns a tensor.""" - b, c, a = x.shape # batch, channels, anchors - return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) - - -def make_anchors(feats, strides, grid_cell_offset=0.5): - anchor_points, stride_tensor = [], [] - assert feats is not None - dtype = feats[0].dtype - for i, stride in enumerate(strides): - h, w = int(feats[i]), int(feats[i]) - sx = torch.arange(end=w, dtype=dtype) + grid_cell_offset # shift x - sy = torch.arange(end=h, dtype=dtype) + grid_cell_offset # shift y - sy, sx = torch.meshgrid(sy, sx) - anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) - stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype)) - return torch.cat(anchor_points), torch.cat(stride_tensor) - - -class Detect(nn.Module): - def __init__(self, nc: int = 80, - ch: List[int] = ()): - """ - Detection layer for YOLOv8. - - Args: - nc (int): Number of classes. - ch (List[int]): List of channel values for detection layers. - - """ - super().__init__() - self.nc = nc # number of classes - self.nl = len(ch) # number of detection layers - self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) - self.no = nc + self.reg_max * 4 # number of outputs per anchor - self.stride = torch.Tensor([8, 16, 32]) - self.feat_sizes = torch.Tensor([80, 40, 20]) - self.img_size = 640 # img size - c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100)) # channels - self.cv2 = nn.ModuleList( - nn.Sequential(Conv(x, c2, 3), - Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) - self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), - nn.Conv2d(c3, self.nc, 1)) for x in ch) - self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() - anchors, strides = (x.transpose(0, 1) for x in make_anchors(self.feat_sizes, - self.stride, 0.5)) - anchors = anchors * strides - - self.register_buffer('anchors', anchors) - self.register_buffer('strides', strides) - - def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]: - shape = x[0].shape # BCHW - for i in range(self.nl): - x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) - box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split( - (self.reg_max * 4, self.nc), 1) - - y_cls = cls.sigmoid().transpose(1, 2) - - dfl = self.dfl(box) - dfl = dfl * self.strides - - # box decoding - lt, rb = dfl.chunk(2, 1) - y1 = self.anchors.unsqueeze(0)[:, 0, :] - lt[:, 0, :] - x1 = self.anchors.unsqueeze(0)[:, 1, :] - lt[:, 1, :] - y2 = self.anchors.unsqueeze(0)[:, 0, :] + rb[:, 0, :] - x2 = self.anchors.unsqueeze(0)[:, 1, :] + rb[:, 1, :] - y_bb = torch.stack((x1, y1, x2, y2), 1).transpose(1, 2) - return y_bb, y_cls - - def bias_init(self): - """Initialize Detect() biases, WARNING: requires stride availability.""" - m = self # self.model[-1] # Detect() module - for a, b, s in zip(m.cv2, m.cv3, m.stride): # from - a[-1].bias.data[:] = 1.0 # box - b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) - - -class Detect_wo_bb_dec(nn.Module): - def __init__(self, nc: int = 80, - ch: List[int] = ()): - """ - Detection layer for YOLOv8. Bounding box decoding was removed. - Args: - nc (int): Number of classes. - ch (List[int]): List of channel values for detection layers. - """ - super().__init__() - self.nc = nc # number of classes - self.nl = len(ch) # number of detection layers - self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) - self.no = nc + self.reg_max * 4 # number of outputs per anchor - self.stride = torch.Tensor([8, 16, 32]) - self.feat_sizes = torch.Tensor([80, 40, 20]) - self.img_size = 640 # img size - c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100)) # channels - self.cv2 = nn.ModuleList( - nn.Sequential(Conv(x, c2, 3), - Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) - self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), - nn.Conv2d(c3, self.nc, 1)) for x in ch) - self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() - - def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]: - shape = x[0].shape # BCHW - for i in range(self.nl): - x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) - box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split( - (self.reg_max * 4, self.nc), 1) - - y_cls = cls.sigmoid() - y_bb = self.dfl(box) - return y_bb, y_cls - - - def bias_init(self): - """Initialize Detect() biases, WARNING: requires stride availability.""" - m = self # self.model[-1] # Detect() module - for a, b, s in zip(m.cv2, m.cv3, m.stride): # from - a[-1].bias.data[:] = 1.0 # box - b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) - -class Pose(Detect_wo_bb_dec): - """YOLOv8 Pose head for keypoints models.""" - - def __init__(self, nc=80, kpt_shape=(17, 3), ch=()): - """Initialize YOLO network with default parameters and Convolutional Layers.""" - super().__init__(nc, ch) - self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) - self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total - self.detect = Detect_wo_bb_dec.forward - - c4 = max(ch[0] // 4, self.nk) - self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch) - - def forward(self, x): - """Perform forward pass through YOLO model and return predictions.""" - bs = x[0].shape[0] # batch size - kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w) - y_bb, y_cls = self.detect(self, x) - return y_bb, y_cls, kpt - -def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) - """Parse a YOLO model.yaml dictionary into a PyTorch model.""" - import ast - - # Args - max_channels = float("inf") - nc, act, scales = (d.get(x) for x in ("nc", "activation", "scales")) - depth, width, kpt_shape = (d.get(x, 1.0) for x in ("depth_multiple", "width_multiple", "kpt_shape")) - if scales: - scale = d.get("scale") - if not scale: - scale = tuple(scales.keys())[0] - depth, width, max_channels = scales[scale] - - if act: - Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() - ch = [ch] - layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out - for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args - m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m] # get module - for j, a in enumerate(args): - if isinstance(a, str): - with contextlib.suppress(ValueError): - args[j] = locals()[a] if a in locals() else ast.literal_eval(a) - - n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain - if m in ( - Conv, - Bottleneck, - SPPF, - C2f, - nn.ConvTranspose2d, - ): - c1, c2 = ch[f], args[0] - if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output) - c2 = make_divisible(min(c2, max_channels) * width, 8) - args = [c1, c2, *args[1:]] - if m in [C2f]: - args.insert(2, n) # number of repeats - n = 1 - elif m is nn.BatchNorm2d: - args = [ch[f]] - elif m is Concat: - c2 = sum(ch[x] for x in f) - elif m in [Segment, Detect, Pose]: - args.append([ch[x] for x in f]) - else: - c2 = ch[f] - - m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module - t = str(m)[8:-2].replace("__main__.", "") # module type - m.np = sum(x.numel() for x in m_.parameters()) # number params - m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type - save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist - layers.append(m_) - if i == 0: - ch = [] - ch.append(c2) - return nn.Sequential(*layers), sorted(save) - -def initialize_weights(model): - """Initialize model weights to random values.""" - for m in model.modules(): - t = type(m) - if t is nn.Conv2d: - pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - elif t is nn.BatchNorm2d: - m.eps = 1e-3 - m.momentum = 0.03 - elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: - m.inplace = True - -def model_predict(model: Any, - inputs: np.ndarray) -> List: - """ - Perform inference using the provided PyTorch model on the given inputs. - - This function handles moving the inputs to the appropriate torch device and data type, - and detaches and moves the outputs to the CPU. - - Args: - model (Any): The PyTorch model used for inference. - inputs (np.ndarray): Input data to perform inference on. - - Returns: - List: List containing tensors of predictions. - """ - device = get_working_device() - inputs = torch.from_numpy(inputs).to(device=device, dtype=torch.float) - - # Run Pytorch inference on the batch - outputs = model(inputs) - - # Detach outputs and move to cpu - outputs = outputs.cpu().detach() - return outputs - -class PostProcessWrapper(nn.Module): - def __init__(self, - model: nn.Module, - score_threshold: float = 0.001, - iou_threshold: float = 0.7, - max_detections: int = 300): - """ - Wrapping PyTorch Module with multiclass_nms layer from sony_custom_layers. - - Args: - model (nn.Module): Model instance. - score_threshold (float): Score threshold for non-maximum suppression. - iou_threshold (float): Intersection over union threshold for non-maximum suppression. - max_detections (float): The number of detections to return. - """ - super(PostProcessWrapper, self).__init__() - self.model = model - self.score_threshold = score_threshold - self.iou_threshold = iou_threshold - self.max_detections = max_detections - - def forward(self, images): - # model inference - outputs = self.model(images) - - boxes = outputs[0] - scores = outputs[1] - nms = multiclass_nms(boxes=boxes, scores=scores, score_threshold=self.score_threshold, - iou_threshold=self.iou_threshold, max_detections=self.max_detections) - return nms - -def keypoints_model_predict(model: Any, inputs: np.ndarray) -> List: - """ - Perform inference using the provided PyTorch model on the given inputs. - - This function handles moving the inputs to the appropriate torch device and data type, - and detaches and moves the outputs to the CPU. - - Args: - model (Any): The PyTorch model used for inference. - inputs (np.ndarray): Input data to perform inference on. - - Returns: - List: List containing tensors of predictions. - """ - device = get_working_device() - inputs = torch.from_numpy(inputs).to(device=device, dtype=torch.float) - - # Run Pytorch inference on the batch - outputs = model(inputs) - - # Detach outputs and move to cpu - output_np = [o.detach().cpu().numpy() for o in outputs] - - return postprocess_yolov8_keypoints(output_np) - -def seg_model_predict(model: Any, - inputs: np.ndarray) -> List: - """ - Perform inference using the provided PyTorch model on the given inputs. - - This function handles moving the inputs to the appropriate torch data type and format, - and returns the outputs. - - Args: - model (Any): The PyTorch model used for inference. - inputs (np.ndarray): Input data to perform inference on. - - Returns: - List: List containing tensors of predictions. - """ - input_tensor = torch.from_numpy(inputs).unsqueeze(0) # Add batch dimension - device = get_working_device() - input_tensor = input_tensor.to(device) - # Run the model - with torch.no_grad(): - outputs = model(input_tensor) - outputs = [output.cpu() for output in outputs] - return outputs - -def yolov8_pytorch(model_yaml: str) -> (nn.Module, Dict): - """ - Create PyTorch model of YOLOv8 detection. - - Args: - model_yaml (str): Name of the YOLOv8 model configuration file (YAML format). - - Returns: - model: YOLOv8 detection model. - cfg_dict: YOLOv8 detection model configuration dictionary. - """ - cfg = model_yaml - cfg_dict = yaml_load(cfg, append_filename=True) # model dict - model = ModelPyTorch(cfg_dict) # model - return model, cfg_dict - - -def yolov8_pytorch_pp(model_yaml: str, - score_threshold: float = 0.001, - iou_threshold: float = 0.7, - max_detections: int = 300) -> (nn.Module, Dict): - """ - Create PyTorch model of YOLOv8 detection with PostProcess. - - Args: - model_yaml (str): Name of the YOLOv8 model configuration file (YAML format). - score_threshold (float): Score threshold for non-maximum suppression. - iou_threshold (float): Intersection over union threshold for non-maximum suppression. - max_detections (float): The number of detections to return. - - Returns: - model: YOLOv8_pp detection model. - cfg_dict: YOLOv8_pp detection model configuration dictionary. - """ - cfg = model_yaml - cfg_dict = yaml_load(cfg, append_filename=True) # model dict - model = ModelPyTorch(cfg_dict) # model - model_pp = PostProcessWrapper(model=model, - score_threshold=score_threshold, - iou_threshold=iou_threshold, - max_detections=max_detections) - return model_pp, cfg_dict - -class Proto(nn.Module): - """YOLOv8 mask Proto module for segmentation models.""" - - def __init__(self, c1, c_=256, c2=32): - """ - Initializes the YOLOv8 mask Proto module with specified number of protos and masks. - - Input arguments are ch_in, number of protos, number of masks. - """ - super().__init__() - self.cv1 = Conv(c1, c_, k=3) - self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest') - self.cv2 = Conv(c_, c_, k=3) - self.cv3 = Conv(c_, c2) - - def forward(self, x): - """Performs a forward pass through layers using an upsampled input image.""" - return self.cv3(self.cv2(self.upsample(self.cv1(x)))) - - -class Segment(Detect): - """YOLOv8 Segment head for segmentation models.""" - - def __init__(self, nc=80, nm=32, npr=256, ch=()): - """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.""" - super().__init__(nc, ch) - self.nm = nm # number of masks - self.npr = npr # number of protos - self.proto = Proto(ch[0], self.npr, self.nm) # protos - self.detect = Detect.forward - - c4 = max(ch[0] // 4, self.nm) - self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) - - def forward(self, x): - """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients.""" - p = self.proto(x[0]) # mask protos - bs = p.shape[0] # batch size - - mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients - y_bb, y_cls = self.detect(self, x) - - return y_bb, y_cls, mc, p - - -class ModelPyTorch(nn.Module, PyTorchModelHubMixin): - """ - Unified YOLOv8 model for both detection and segmentation. - - Args: - cfg (dict): Model configuration in the form of a YAML string or a dictionary. - ch (int): Number of input channels. - mode (str): Mode of operation ('detection' or 'segmentation'). - """ - def __init__(self, cfg: dict, ch: int = 3, mode: str = 'detection'): - super().__init__() - self.yaml = cfg - ch = self.yaml['ch'] = self.yaml.get('ch', ch) - self.mode = mode - self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch) - self.names = {i: f"{i}" for i in range(self.yaml["nc"])} - self.inplace = self.yaml.get("inplace", True) - - m = self.model[-1] - if isinstance(m, Segment) and self.mode == 'segmentation': - m.inplace = self.inplace - m.bias_init() - elif isinstance(m, Detect) and self.mode == 'detection': - m.inplace = self.inplace - m.bias_init() - else: - self.stride = torch.Tensor([32]) - - initialize_weights(self) - - def forward(self, x): - y = [] - for m in self.model: - if m.f != -1: - x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] - x = m(x) - y.append(x if m.i in self.save else None) - return x - - def load_weights(self, path): - self.load_state_dict(torch.load(path)) - - def save_weights(self, path): - torch.save(self.state_dict(), path) - - def make_tensors_contiguous(self): - for name, param in self.named_parameters(): - if not param.is_contiguous(): - param.data = param.data.contiguous() - - for name, buffer in self.named_buffers(): - if not buffer.is_contiguous(): - buffer.data = buffer.data.contiguous() - - def save_pretrained(self, save_directory, **kwargs): - # Make tensors contiguous - self.make_tensors_contiguous() - # Call the original save_pretrained method - super().save_pretrained(save_directory, **kwargs) diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8_postprocess.py b/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8_postprocess.py deleted file mode 100644 index ae661ac76..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8_postprocess.py +++ /dev/null @@ -1,124 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -""" -Part of this code was based on Ultralytics implementation. For more details, refer to the original repository: -https://github.com/ultralytics/ultralytics -""" -from typing import Tuple -import numpy as np -from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation_utils import convert_to_ymin_xmin_ymax_xmax_format, \ - BoxFormat, nms, combined_nms - - -def postprocess_yolov8_keypoints(outputs: Tuple[np.ndarray, np.ndarray, np.ndarray], - conf: float = 0.001, - iou_thres: float = 0.7, - max_out_dets: int = 300) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Postprocess the outputs of a YOLOv8 model for pose estimation. - - Args: - outputs (Tuple[np.ndarray, np.ndarray, np.ndarray]): Tuple containing the model outputs for bounding boxes, - scores and keypoint predictions. - conf (float, optional): Confidence threshold for bounding box predictions. Default is 0.001. - iou_thres (float, optional): IoU (Intersection over Union) threshold for Non-Maximum Suppression (NMS). - Default is 0.7. - max_out_dets (int, optional): Maximum number of output detections to keep after NMS. Default is 300. - - Returns: - Tuple[np.ndarray, np.ndarray, np.ndarray]: Tuple containing the post-processed bounding boxes, - their corresponding scores and keypoints. - """ - kpt_shape = (17, 3) - feat_sizes = np.array([80, 40, 20]) - stride_sizes = np.array([8, 16, 32]) - a, s = (x.transpose() for x in make_anchors_yolo_v8(feat_sizes, stride_sizes, 0.5)) - - y_bb, y_cls, kpts = outputs - dbox = dist2bbox_yolo_v8(y_bb, np.expand_dims(a, 0), xywh=True, dim=1) * s - detect_out = np.concatenate((dbox, y_cls), 1) - # additional part for pose estimation - ndim = kpt_shape[1] - pred_kpt = kpts.copy() - if ndim == 3: - pred_kpt[:, 2::3] = 1 / (1 + np.exp(-pred_kpt[:, 2::3])) # sigmoid (WARNING: inplace .sigmoid_() Apple MPS bug) - pred_kpt[:, 0::ndim] = (pred_kpt[:, 0::ndim] * 2.0 + (a[0] - 0.5)) * s - pred_kpt[:, 1::ndim] = (pred_kpt[:, 1::ndim] * 2.0 + (a[1] - 0.5)) * s - - x_batch = np.concatenate([detect_out.transpose([0, 2, 1]), pred_kpt.transpose([0, 2, 1])], 2) - nms_bbox, nms_scores, nms_kpts = [], [], [] - for x in x_batch: - x = x[(x[:, 4] > conf)] - x = x[np.argsort(-x[:, 4])[:8400]] - x[..., :4] = convert_to_ymin_xmin_ymax_xmax_format(x[..., :4], BoxFormat.XC_YC_W_H) - boxes = x[..., :4] - scores = x[..., 4] - - # Original post-processing part - valid_indexs = nms(boxes, scores, iou_thres=iou_thres, max_out_dets=max_out_dets) - x = x[valid_indexs] - nms_bbox.append(x[:, :4]) - nms_scores.append(x[:, 4]) - nms_kpts.append(x[:, 5:]) - - return nms_bbox, nms_scores, nms_kpts - - -def postprocess_yolov8_inst_seg(outputs: Tuple[np.ndarray, np.ndarray, np.ndarray], - conf: float = 0.001, - iou_thres: float = 0.7, - max_out_dets: int = 300) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - - feat_sizes = np.array([80, 40, 20]) - stride_sizes = np.array([8, 16, 32]) - a, s = (x.transpose() for x in make_anchors_yolo_v8(feat_sizes, stride_sizes, 0.5)) - - y_bb, y_cls, y_masks = outputs - dbox = dist2bbox_yolo_v8(y_bb, a, xywh=True, dim=1) * s - detect_out = np.concatenate((dbox, y_cls), 1) - - xd = detect_out.transpose([0, 2, 1]) - - return combined_nms(xd[..., :4], xd[..., 4:84], iou_thres, conf, max_out_dets) - - -def make_anchors_yolo_v8(feats, strides, grid_cell_offset=0.5): - """Generate anchors from features.""" - anchor_points, stride_tensor = [], [] - assert feats is not None - for i, stride in enumerate(strides): - h, w = feats[i], feats[i] - sx = np.arange(stop=w) + grid_cell_offset # shift x - sy = np.arange(stop=h) + grid_cell_offset # shift y - sy, sx = np.meshgrid(sy, sx, indexing='ij') - anchor_points.append(np.stack((sx, sy), -1).reshape((-1, 2))) - stride_tensor.append(np.full((h * w, 1), stride)) - return np.concatenate(anchor_points), np.concatenate(stride_tensor) - - -def dist2bbox_yolo_v8(distance, anchor_points, xywh=True, dim=-1): - """Transform distance(ltrb) to box(xywh or xyxy).""" - lt, rb = np.split(distance,2,axis=dim) - x1y1 = anchor_points - lt - x2y2 = anchor_points + rb - if xywh: - c_xy = (x1y1 + x2y2) / 2 - wh = x2y2 - x1y1 - return np.concatenate((c_xy, wh), dim) # xywh bbox - return np.concatenate((x1y1, x2y2), dim) # xyxy bbox diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8_preprocess.py b/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8_preprocess.py deleted file mode 100644 index e13f3ee0a..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8_preprocess.py +++ /dev/null @@ -1,60 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -""" -This code is mostly based on Ultralytics implementation. For more details, refer to the original repository: -https://github.com/ultralytics/ultralytics -""" - -import numpy as np -from typing import Tuple -import cv2 - - -def yolov8_preprocess_chw_transpose(x: np.ndarray, img_mean: float = 0.0, img_std: float = 255.0, pad_values: int = 114, - size: Tuple[int, int] = (640, 640)) -> np.ndarray: - """ - Preprocess an input image for YOLOv8 model with additional CHW transpose (for PyTorch implementation) - - Args: - x (np.ndarray): Input image as a NumPy array. - img_mean (float): Mean value used for normalization. Default is 0.0. - img_std (float): Standard deviation used for normalization. Default is 255.0. - pad_values (int): Value used for padding. Default is 114. - size (Tuple[int, int]): Desired output size (height, width). Default is (640, 640). - - Returns: - np.ndarray: Preprocessed image as a NumPy array. - """ - - h, w = x.shape[:2] # Image size - hn, wn = size # Image new size - r = max(h / hn, w / wn) - hr, wr = int(np.round(h / r)), int(np.round(w / r)) - pad = ( - (int((hn - hr) / 2), int((hn - hr) / 2 + 0.5)), - (int((wn - wr) / 2), int((wn - wr) / 2 + 0.5)), - (0, 0) - ) - - x = np.flip(x, -1) # Flip image channels - x = cv2.resize(x, (wr, hr), interpolation=cv2.INTER_AREA) # Aspect ratio preserving resize - x = np.pad(x, pad, constant_values=pad_values) # Padding to the target size - x = (x - img_mean) / img_std # Normalization - x = x.transpose([2, 0, 1]) - return x diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n-pose.yaml b/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n-pose.yaml deleted file mode 100644 index 61a3143e5..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n-pose.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -# The following code was mostly duplicated from https://github.com/ultralytics/ultralytics -# -# Ultralytics YOLO 🚀, AGPL-3.0 license -# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose - -# Parameters -nc: 1 # number of classes -kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) -scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n' - # [depth, width, max_channels] - n: [0.33, 0.25, 1024] - s: [0.33, 0.50, 1024] - m: [0.67, 0.75, 768] - l: [1.00, 1.00, 512] - x: [1.00, 1.25, 512] - -# YOLOv8.0n backbone -backbone: - # [from, repeats, module, args] - - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - - [-1, 3, C2f, [128, True]] - - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - - [-1, 6, C2f, [256, True]] - - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - - [-1, 6, C2f, [512, True]] - - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - - [-1, 3, C2f, [1024, True]] - - [-1, 1, SPPF, [1024, 5]] # 9 - -# YOLOv8.0n head -head: - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - - [-1, 3, C2f, [512]] # 12 - - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - - [-1, 3, C2f, [256]] # 15 (P3/8-small) - - - [-1, 1, Conv, [256, 3, 2]] - - [[-1, 12], 1, Concat, [1]] # cat head P4 - - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - - - [-1, 1, Conv, [512, 3, 2]] - - [[-1, 9], 1, Concat, [1]] # cat head P5 - - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - - - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5) \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n.yaml b/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n.yaml deleted file mode 100644 index 52659586c..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the Ultralytics repository (YOLOv8) -# Copyright (C) 2024 Ultralytics -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# ------------------------------------------------------------------------------ - -# The following code was mostly duplicated from https://github.com/ultralytics/ultralytics -# -# Ultralytics YOLO 🚀, AGPL-3.0 license -# Yolov8n Object Detection Model - Configuration for PyTorch implementation - -# Parameters -nc: 80 # number of classes -scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' - # [depth, width, max_channels] - n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs - s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs - m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs - l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs - x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs - -# YOLOv8.0n backbone -backbone: - # [from, repeats, module, args] - - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - - [-1, 3, C2f, [128, True]] - - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - - [-1, 6, C2f, [256, True]] - - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - - [-1, 6, C2f, [512, True]] - - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - - [-1, 3, C2f, [1024, True]] - - [-1, 1, SPPF, [1024, 5]] # 9 - -# YOLOv8.0n head -head: - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - - [-1, 3, C2f, [512]] # 12 - - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - - [-1, 3, C2f, [256]] # 15 (P3/8-small) - - - [-1, 1, Conv, [256, 3, 2]] - - [[-1, 12], 1, Concat, [1]] # cat head P4 - - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - - - [-1, 1, Conv, [512, 3, 2]] - - [[-1, 9], 1, Concat, [1]] # cat head P5 - - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - - - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/yolox/LICENSE b/tutorials/mct_model_garden/models_pytorch/yolox/LICENSE deleted file mode 100644 index bae94e189..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolox/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/yolox/__init__.py b/tutorials/mct_model_garden/models_pytorch/yolox/__init__.py deleted file mode 100644 index e11a7cc60..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolox/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/mct_model_garden/models_pytorch/yolox/darknet.py b/tutorials/mct_model_garden/models_pytorch/yolox/darknet.py deleted file mode 100644 index 083d49bcd..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolox/darknet.py +++ /dev/null @@ -1,387 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the https://github.com/Megvii-BaseDetection/YOLOX repository. -# Copyright (c) 2021-2022 Megvii Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ------------------------------------------------------------------------------ - - -import torch -from torch import nn - - -def get_activation(name="silu", inplace=True): - if name == "silu": - module = nn.SiLU(inplace=inplace) - elif name == "relu": - module = nn.ReLU(inplace=inplace) - elif name == "lrelu": - module = nn.LeakyReLU(0.1, inplace=inplace) - else: - raise AttributeError("Unsupported act type: {}".format(name)) - return module - - -class BaseConv(nn.Module): - """A Conv2d -> Batchnorm -> silu/leaky relu block""" - - def __init__( - self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu" - ): - super().__init__() - # same padding - pad = (ksize - 1) // 2 - self.conv = nn.Conv2d( - in_channels, - out_channels, - kernel_size=ksize, - stride=stride, - padding=pad, - groups=groups, - bias=bias, - ) - self.bn = nn.BatchNorm2d(out_channels) - self.act = get_activation(act, inplace=True) - - def forward(self, x): - return self.act(self.bn(self.conv(x))) - - def fuseforward(self, x): - return self.act(self.conv(x)) - - -class DWConv(nn.Module): - """Depthwise Conv + Conv""" - - def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"): - super().__init__() - self.dconv = BaseConv( - in_channels, - in_channels, - ksize=ksize, - stride=stride, - groups=in_channels, - act=act, - ) - self.pconv = BaseConv( - in_channels, out_channels, ksize=1, stride=1, groups=1, act=act - ) - - def forward(self, x): - x = self.dconv(x) - return self.pconv(x) - - -class Bottleneck(nn.Module): - # Standard bottleneck - def __init__( - self, - in_channels, - out_channels, - shortcut=True, - expansion=0.5, - depthwise=False, - act="silu", - ): - super().__init__() - hidden_channels = int(out_channels * expansion) - Conv = DWConv if depthwise else BaseConv - self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) - self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act) - self.use_add = shortcut and in_channels == out_channels - - def forward(self, x): - y = self.conv2(self.conv1(x)) - if self.use_add: - y = y + x - return y - - -class ResLayer(nn.Module): - "Residual layer with `in_channels` inputs." - - def __init__(self, in_channels: int): - super().__init__() - mid_channels = in_channels // 2 - self.layer1 = BaseConv( - in_channels, mid_channels, ksize=1, stride=1, act="lrelu" - ) - self.layer2 = BaseConv( - mid_channels, in_channels, ksize=3, stride=1, act="lrelu" - ) - - def forward(self, x): - out = self.layer2(self.layer1(x)) - return x + out - - -class SPPBottleneck(nn.Module): - """Spatial pyramid pooling layer used in YOLOv3-SPP""" - - def __init__( - self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu" - ): - super().__init__() - hidden_channels = in_channels // 2 - self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation) - self.m = nn.ModuleList( - [ - nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) - for ks in kernel_sizes - ] - ) - conv2_channels = hidden_channels * (len(kernel_sizes) + 1) - self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation) - - def forward(self, x): - x = self.conv1(x) - x = torch.cat([x] + [m(x) for m in self.m], dim=1) - x = self.conv2(x) - return x - - -class CSPLayer(nn.Module): - """C3 in yolov5, CSP Bottleneck with 3 convolutions""" - - def __init__( - self, - in_channels, - out_channels, - n=1, - shortcut=True, - expansion=0.5, - depthwise=False, - act="silu", - ): - """ - Args: - in_channels (int): input channels. - out_channels (int): output channels. - n (int): number of Bottlenecks. Default value: 1. - """ - # ch_in, ch_out, number, shortcut, groups, expansion - super().__init__() - hidden_channels = int(out_channels * expansion) # hidden channels - self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) - self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) - self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act) - module_list = [ - Bottleneck( - hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act - ) - for _ in range(n) - ] - self.m = nn.Sequential(*module_list) - - def forward(self, x): - x_1 = self.conv1(x) - x_2 = self.conv2(x) - x_1 = self.m(x_1) - x = torch.cat((x_1, x_2), dim=1) - return self.conv3(x) - - -class Focus(nn.Module): - """Focus width and height information into channel space.""" - - def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"): - super().__init__() - self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act) - - def forward(self, x): - # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2) - patch_top_left = x[..., ::2, ::2] - patch_top_right = x[..., ::2, 1::2] - patch_bot_left = x[..., 1::2, ::2] - patch_bot_right = x[..., 1::2, 1::2] - x = torch.cat( - ( - patch_top_left, - patch_bot_left, - patch_top_right, - patch_bot_right, - ), - dim=1, - ) - return self.conv(x) - - -class Darknet(nn.Module): - # number of blocks from dark2 to dark5. - depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]} - - def __init__( - self, - depth, - in_channels=3, - stem_out_channels=32, - out_features=("dark3", "dark4", "dark5"), - ): - """ - Args: - depth (int): depth of darknet used in model, usually use [21, 53] for this param. - in_channels (int): number of input channels, for example, use 3 for RGB image. - stem_out_channels (int): number of output channels of darknet stem. - It decides channels of darknet layer2 to layer5. - out_features (Tuple[str]): desired output layer name. - """ - super().__init__() - assert out_features, "please provide output features of Darknet" - self.out_features = out_features - self.stem = nn.Sequential( - BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act="lrelu"), - *self.make_group_layer(stem_out_channels, num_blocks=1, stride=2), - ) - in_channels = stem_out_channels * 2 # 64 - - num_blocks = Darknet.depth2blocks[depth] - # create darknet with `stem_out_channels` and `num_blocks` layers. - # to make model structure more clear, we don't use `for` statement in python. - self.dark2 = nn.Sequential( - *self.make_group_layer(in_channels, num_blocks[0], stride=2) - ) - in_channels *= 2 # 128 - self.dark3 = nn.Sequential( - *self.make_group_layer(in_channels, num_blocks[1], stride=2) - ) - in_channels *= 2 # 256 - self.dark4 = nn.Sequential( - *self.make_group_layer(in_channels, num_blocks[2], stride=2) - ) - in_channels *= 2 # 512 - - self.dark5 = nn.Sequential( - *self.make_group_layer(in_channels, num_blocks[3], stride=2), - *self.make_spp_block([in_channels, in_channels * 2], in_channels * 2), - ) - - def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1): - "starts with conv layer then has `num_blocks` `ResLayer`" - return [ - BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act="lrelu"), - *[(ResLayer(in_channels * 2)) for _ in range(num_blocks)], - ] - - def make_spp_block(self, filters_list, in_filters): - m = nn.Sequential( - *[ - BaseConv(in_filters, filters_list[0], 1, stride=1, act="lrelu"), - BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"), - SPPBottleneck( - in_channels=filters_list[1], - out_channels=filters_list[0], - activation="lrelu", - ), - BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"), - BaseConv(filters_list[1], filters_list[0], 1, stride=1, act="lrelu"), - ] - ) - return m - - def forward(self, x): - outputs = {} - x = self.stem(x) - outputs["stem"] = x - x = self.dark2(x) - outputs["dark2"] = x - x = self.dark3(x) - outputs["dark3"] = x - x = self.dark4(x) - outputs["dark4"] = x - x = self.dark5(x) - outputs["dark5"] = x - return {k: v for k, v in outputs.items() if k in self.out_features} - - -class CSPDarknet(nn.Module): - def __init__( - self, - dep_mul, - wid_mul, - out_features=("dark3", "dark4", "dark5"), - depthwise=False, - act="silu", - ): - super().__init__() - assert out_features, "please provide output features of Darknet" - self.out_features = out_features - Conv = DWConv if depthwise else BaseConv - - base_channels = int(wid_mul * 64) # 64 - base_depth = max(round(dep_mul * 3), 1) # 3 - - # stem - self.stem = Focus(3, base_channels, ksize=3, act=act) - - # dark2 - self.dark2 = nn.Sequential( - Conv(base_channels, base_channels * 2, 3, 2, act=act), - CSPLayer( - base_channels * 2, - base_channels * 2, - n=base_depth, - depthwise=depthwise, - act=act, - ), - ) - - # dark3 - self.dark3 = nn.Sequential( - Conv(base_channels * 2, base_channels * 4, 3, 2, act=act), - CSPLayer( - base_channels * 4, - base_channels * 4, - n=base_depth * 3, - depthwise=depthwise, - act=act, - ), - ) - - # dark4 - self.dark4 = nn.Sequential( - Conv(base_channels * 4, base_channels * 8, 3, 2, act=act), - CSPLayer( - base_channels * 8, - base_channels * 8, - n=base_depth * 3, - depthwise=depthwise, - act=act, - ), - ) - - # dark5 - self.dark5 = nn.Sequential( - Conv(base_channels * 8, base_channels * 16, 3, 2, act=act), - SPPBottleneck(base_channels * 16, base_channels * 16, activation=act), - CSPLayer( - base_channels * 16, - base_channels * 16, - n=base_depth, - shortcut=False, - depthwise=depthwise, - act=act, - ), - ) - - def forward(self, x): - outputs = {} - x = self.stem(x) - outputs["stem"] = x - x = self.dark2(x) - outputs["dark2"] = x - x = self.dark3(x) - outputs["dark3"] = x - x = self.dark4(x) - outputs["dark4"] = x - x = self.dark5(x) - outputs["dark5"] = x - return {k: v for k, v in outputs.items() if k in self.out_features} diff --git a/tutorials/mct_model_garden/models_pytorch/yolox/yolox.py b/tutorials/mct_model_garden/models_pytorch/yolox/yolox.py deleted file mode 100644 index 2bd9903ed..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolox/yolox.py +++ /dev/null @@ -1,447 +0,0 @@ -# ------------------------------------------------------------------------------ -# This file contains code from the https://github.com/Megvii-BaseDetection/YOLOX repository. -# Copyright (c) 2021-2022 Megvii Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ------------------------------------------------------------------------------ - -# The following code was mostly duplicated from https://github.com/Megvii-BaseDetection/YOLOX -# and changed to generate an equivalent PyTorch model suitable for quantization. -# Main changes: -# * Modify layers to make them more suitable for quantization. -# * Integrate box decoding and NMS into the model -# ============================================================================== -""" - -This code contains a PyTorch implementation of Yolox object detection model. -This implementation includes a slightly modified version of Yolox -detection-head (mainly the box decoding part) which is optimized for model quantization. -For more details on Yolox, refer to the original repository: -https://github.com/Megvii-BaseDetection/YOLOX - -""" -from typing import Tuple, List -import torch -import torch.nn as nn -import numpy as np -from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device -from .darknet import CSPDarknet, CSPLayer, BaseConv, DWConv -from sony_custom_layers.pytorch import multiclass_nms, FasterRCNNBoxDecode - - -class YOLOPAFPN(nn.Module): - """ - Darknet 53 is the default backbone of this model. - """ - - def __init__( - self, - depth=1.0, - width=1.0, - in_features=("dark3", "dark4", "dark5"), - in_channels=[256, 512, 1024], - depthwise=False, - act="silu", - ): - super().__init__() - self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act) - self.in_features = in_features - self.in_channels = in_channels - Conv = DWConv if depthwise else BaseConv - - self.upsample = nn.Upsample(scale_factor=2, mode="nearest") - self.lateral_conv0 = BaseConv( - int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act - ) - self.C3_p4 = CSPLayer( - int(2 * in_channels[1] * width), - int(in_channels[1] * width), - round(3 * depth), - False, - depthwise=depthwise, - act=act, - ) # cat - - self.reduce_conv1 = BaseConv( - int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act - ) - self.C3_p3 = CSPLayer( - int(2 * in_channels[0] * width), - int(in_channels[0] * width), - round(3 * depth), - False, - depthwise=depthwise, - act=act, - ) - - # bottom-up conv - self.bu_conv2 = Conv( - int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act - ) - self.C3_n3 = CSPLayer( - int(2 * in_channels[0] * width), - int(in_channels[1] * width), - round(3 * depth), - False, - depthwise=depthwise, - act=act, - ) - - # bottom-up conv - self.bu_conv1 = Conv( - int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act - ) - self.C3_n4 = CSPLayer( - int(2 * in_channels[1] * width), - int(in_channels[2] * width), - round(3 * depth), - False, - depthwise=depthwise, - act=act, - ) - - def forward(self, inputs): - """ - Args: - inputs: input images. - - Returns: - Tuple[Tensor]: FPN feature. - """ - - # backbone - out_features = self.backbone(inputs) - features = [out_features[f] for f in self.in_features] - [x2, x1, x0] = features - - fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 - f_out0 = self.upsample(fpn_out0) # 512/16 - f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16 - f_out0 = self.C3_p4(f_out0) # 1024->512/16 - - fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 - f_out1 = self.upsample(fpn_out1) # 256/8 - f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8 - pan_out2 = self.C3_p3(f_out1) # 512->256/8 - - p_out1 = self.bu_conv2(pan_out2) # 256->256/16 - p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16 - pan_out1 = self.C3_n3(p_out1) # 512->512/16 - - p_out0 = self.bu_conv1(pan_out1) # 512->512/32 - p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32 - pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 - - outputs = (pan_out2, pan_out1, pan_out0) - return outputs - - -class YOLOXHead(nn.Module): - def __init__( - self, - num_classes, - width=1.0, - strides=[8, 16, 32], - in_channels=[256, 512, 1024], - act="silu", - depthwise=False, - ): - """ - Args: - act (str): activation type of conv. Defalut value: "silu". - depthwise (bool): whether apply depthwise conv in conv branch. Defalut value: False. - """ - super().__init__() - self.device = get_working_device() - self.num_classes = num_classes - self.strides = strides - self.cls_convs = nn.ModuleList() - self.reg_convs = nn.ModuleList() - self.cls_preds = nn.ModuleList() - self.reg_preds = nn.ModuleList() - self.obj_preds = nn.ModuleList() - self.stems = nn.ModuleList() - Conv = DWConv if depthwise else BaseConv - - for i in range(len(in_channels)): - self.stems.append( - BaseConv( - in_channels=int(in_channels[i] * width), - out_channels=int(256 * width), - ksize=1, - stride=1, - act=act, - ) - ) - self.cls_convs.append( - nn.Sequential( - *[ - Conv( - in_channels=int(256 * width), - out_channels=int(256 * width), - ksize=3, - stride=1, - act=act, - ), - Conv( - in_channels=int(256 * width), - out_channels=int(256 * width), - ksize=3, - stride=1, - act=act, - ), - ] - ) - ) - self.reg_convs.append( - nn.Sequential( - *[ - Conv( - in_channels=int(256 * width), - out_channels=int(256 * width), - ksize=3, - stride=1, - act=act, - ), - Conv( - in_channels=int(256 * width), - out_channels=int(256 * width), - ksize=3, - stride=1, - act=act, - ), - ] - ) - ) - self.cls_preds.append( - nn.Conv2d( - in_channels=int(256 * width), - out_channels=self.num_classes, - kernel_size=1, - stride=1, - padding=0, - ) - ) - self.reg_preds.append( - nn.Conv2d( - in_channels=int(256 * width), - out_channels=4, - kernel_size=1, - stride=1, - padding=0, - ) - ) - self.obj_preds.append( - nn.Conv2d( - in_channels=int(256 * width), - out_channels=1, - kernel_size=1, - stride=1, - padding=0, - ) - ) - - def forward(self, xin): - outputs = [] - - for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate( - zip(self.cls_convs, self.reg_convs, self.strides, xin)): - x = self.stems[k](x) - cls_x = x - reg_x = x - - cls_feat = cls_conv(cls_x) - cls_output = self.cls_preds[k](cls_feat) - - reg_feat = reg_conv(reg_x) - reg_output = self.reg_preds[k](reg_feat) - obj_output = self.obj_preds[k](reg_feat) - output = torch.cat([reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1) - outputs.append(output) - - # [batch, n_anchors_all, 85] - outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) - return outputs - - -class YOLOX(nn.Module): - """ - YOLOX model for object detection. - - Args: - cfg (dict): Model configuration in the form of a dictionary. - """ - - def __init__(self, cfg): - super(YOLOX, self).__init__() - self.device = get_working_device() - self.cfg = cfg - self.depth = cfg.get("depth") - self.width = cfg.get("width") - self.img_size = cfg.get("img_size") - self.num_classes = cfg.get("num_classes") - self.act = cfg.get("act") - self.depthwise = cfg.get("depthwise") - self.backbone = YOLOPAFPN( - self.depth, self.width, - act=self.act, depthwise=self.depthwise, - ) - self.head = YOLOXHead( - self.num_classes, self.width, - act=self.act, depthwise=self.depthwise, - ) - self.init_weights() - - def load_weights(self, path): - """ - Load weights to model. - Args: - path (str): weight's file path - """ - sd = torch.load(path, map_location=self.device, weights_only=True)['model'] - self.load_state_dict(sd) - - def init_weights(self): - """ - Init batchnorm eps and momentum - """ - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - self.eval().to(self.device) - - def forward(self, x: torch.Tensor) -> Tuple[torch.tensor]: - """ - Inference - Args: - x (tensor): input tensor - - Returns: - tuple containing tensors of boxes and scores - """ - fpn_outs = self.backbone(x) - outputs = self.head(fpn_outs) - boxes = outputs[...,:4] - # Convert from (xc,yc,w,h) to (yc,xc,h,w) - xc, yc, w, h = boxes[..., 0], boxes[..., 1], boxes[..., 2], boxes[..., 3] - boxes = torch.stack([yc, xc, h, w], dim=-1) - scores = outputs[..., 5:] * outputs[..., 4:5] # classes * scores - return boxes, scores - - -class YOLOXPostProcess(nn.Module): - """ - Wrapping YoloX with post process functionality: box decoding and multiclass_nms layer from sony_custom_layers. - - Args: - model (nn.Module): Model instance. - img_size: (tuple): Image size input of the model. - score_threshold (float): Score threshold for non-maximum suppression. - iou_threshold (float): Intersection over union threshold for non-maximum suppression. - max_detections (int): The number of detections to return. - """ - def __init__(self, - model: nn.Module, - img_size: tuple = (416,416), - score_threshold: float = 0.001, - iou_threshold: float = 0.65, - max_detections: int = 200): - super(YOLOXPostProcess, self).__init__() - self.device = get_working_device() - self.model = model - self.box_decoder = FasterRCNNBoxDecode(anchors=self.create_anchors(img_size), - scale_factors=[1,1,1,1], - clip_window=[0,0,*img_size]) - self.score_threshold = score_threshold - self.iou_threshold = iou_threshold - self.max_detections = max_detections - - def create_anchors(self, img_size: Tuple, strides: List = [8, 16, 32]) -> torch.tensor: - """ - Create anchors for box decoding operation. - Args: - img_size: (tuple): Image size input of the model. - strides (list): strides to bed used in anchors. - - Returns: - outputs: tesnor of anchors. - """ - device = get_working_device() - fmap_grids = [] - fmap_strides = [] - hsizes = [img_size[0] // stride for stride in strides] - wsizes = [img_size[1] // stride for stride in strides] - for hsize, wsize, stride in zip(hsizes, wsizes, strides): - yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)]) - grid = torch.stack((xv, yv), 2).view(1, -1, 2) - fmap_grids.append(grid) - shape = grid.shape[:2] - fmap_strides.append(torch.full((*shape, 1), stride)) - - s = torch.cat(fmap_strides, dim=1).to(device) - offsets = s * torch.cat(fmap_grids, dim=1).to(device) - xc, yc = offsets[..., 0:1], offsets[..., 1:2] - anchors = torch.concat([(2 * yc - s) / 2, (2 * xc - s) / 2, (2 * yc + s) / 2, (2 * xc + s) / 2], dim=-1) - anchors = anchors.squeeze(0) - return anchors - - def forward(self, images: torch.tensor) -> Tuple: - """ - Perform inference on the given images. - Args: - images (np.ndarray): Input data to perform inference on. - - Returns: - predictions consit of boxes, scores, labels - """ - # Inference - boxes, scores = self.model(images) - # Box decoder - boxes = self.box_decoder(boxes) - # NMS - nms_out = multiclass_nms(boxes=boxes, - scores=scores, - score_threshold=self.score_threshold, - iou_threshold=self.iou_threshold, - max_detections=self.max_detections) - return nms_out.boxes, nms_out.scores, nms_out.labels - - -def model_predict(model: nn.Module, - inputs: np.ndarray) -> Tuple[torch.tensor]: - """ - Perform inference using the provided model on the given inputs. - - This function handles moving the inputs to the appropriate torch device and data type, - detaches and moves the outputs to the CPU. - - Args: - model (Any): The PyTorch model used for inference. - inputs (np.ndarray): Input data to perform inference on. - - Returns: - outputs: tuple containing tensors of predictions. - """ - device = get_working_device() - inputs = torch.from_numpy(inputs).to(device=device, dtype=torch.float) - - # Run Pytorch inference on the batch - outputs = model(inputs) - - # Detach outputs and move to cpu - outputs = [output.detach().cpu() for output in outputs] - boxes = outputs[0] - scores = outputs[1] - labels = outputs[2] - - return boxes, scores, labels diff --git a/tutorials/mct_model_garden/models_pytorch/yolox/yolox.yaml b/tutorials/mct_model_garden/models_pytorch/yolox/yolox.yaml deleted file mode 100644 index b20cba9cb..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolox/yolox.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -nano: - depth: 0.33 - width: 0.25 - img_size: [416, 416] - num_classes: 80 - act: "silu" - depthwise: True - -tiny: - depth: 0.33 - width: 0.375 - img_size: [416, 416] - num_classes: 80 - act: "silu" - depthwise: False - -small: - depth: 0.33 - width: 0.50 - img_size: [640, 640] - num_classes: 80 - act: "silu" - depthwise: False - -medium: - depth: 0.67 - width: 0.75 - img_size: [640, 640] - num_classes: 80 - act: "silu" - depthwise: False - -large: - depth: 1.0 - width: 1.0 - img_size: [640, 640] - num_classes: 80 - act: "silu" - depthwise: False - -xlarge: - depth: 1.33 - width: 1.25 - img_size: [640, 640] - num_classes: 80 - act: "silu" - depthwise: False \ No newline at end of file diff --git a/tutorials/mct_model_garden/models_pytorch/yolox/yolox_preprocess.py b/tutorials/mct_model_garden/models_pytorch/yolox/yolox_preprocess.py deleted file mode 100644 index 279abff0f..000000000 --- a/tutorials/mct_model_garden/models_pytorch/yolox/yolox_preprocess.py +++ /dev/null @@ -1,35 +0,0 @@ -# =========================================================================================== -# The following code was adopted from https://github.com/Megvii-BaseDetection/YOLOX -# =========================================================================================== - -import numpy as np -from typing import Tuple -import cv2 - - -def yolox_preprocess_chw_transpose(img: np.ndarray, - pad_values: int = 114, - size: Tuple[int, int] = (416, 416)) -> np.ndarray: - """ - Preprocess an input image for YOLOX model with reshape and CHW transpose (for PyTorch implementation) - - Args: - img (np.ndarray): Input image as a NumPy array. - pad_values (int): Value used for padding. Default is 114. - size (Tuple[int, int]): Desired output size (height, width). Default is (416, 416). - - Returns: - np.ndarray: Preprocessed image as a NumPy array. - """ - padded_img = np.ones((size[0], size[1], 3), dtype=np.uint8) * pad_values - r = min(size[0] / img.shape[0], size[1] / img.shape[1]) - resized_img = cv2.resize( - img, - (int(img.shape[1] * r), int(img.shape[0] * r)), - interpolation=cv2.INTER_LINEAR, - ).astype(np.uint8) - padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img - - padded_img = padded_img.transpose((2, 0, 1)) - padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) - return padded_img diff --git a/tutorials/notebooks/__init__.py b/tutorials/notebooks/__init__.py deleted file mode 100644 index e11a7cc60..000000000 --- a/tutorials/notebooks/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/tutorials/notebooks/imx500_notebooks/README.md b/tutorials/notebooks/imx500_notebooks/README.md deleted file mode 100644 index 239ae31f3..000000000 --- a/tutorials/notebooks/imx500_notebooks/README.md +++ /dev/null @@ -1,172 +0,0 @@ -# Sony-IMX500 Notebooks - -Here we provide examples on quantizing pre-trained models for deployment on Sony-IMX500 processing platform. -We will cover various tasks and demonstrate the necessary steps to achieve efficient quantization for optimal -deployment performance. - -*[1]* Jupyter notebook explaining how to generate IMX500 compatible model. - -*[2]* Floating point model with necessary adjustments for MCT compatibility. If none, the source model is compatible with MCT. - -*[3]* Expected model accuracy on IMX500. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TaskModel NameNotebook[1]Source RepositoryAdjusted Model[2]Dataset NameFloat Model AccuracyCompressed Model Accuracy[3]
ClassificationMobilenetV2 ipynb (Keras)Keras ApplicationsImageNet71.8571.67
MobileVit ipynb (PyTorch)Timmmct-model-gardenImageNet74.6472.56
regnety_002.pycls_in1k ipynb (PyTorch)TimmImageNet70.2869.9
regnetx_002.pycls_in1kTimmImageNet68.75268.652
regnety_004.pycls_in1kTimmImageNet74.02673.72
mnasnet1_0 ipynb (PyTorch)torchvisionImageNet73.4773.16
mobilenet_v2torchvisionImageNet72.0171.25
regnet_y_400mftorchvisionImageNet74.0373.69
shufflenet_v2_x1_5torchvisionImageNet69.3469.04
Object DetectionYOLOv8n ipynb (Keras)Ultralyticsmct-model-gardenCOCO37.335.1
YOLOv8n ipynb (PyTorch)Ultralyticsmct-model-gardenCOCO37.335.1
NanoDet-Plus-m-416 ipynb (Keras)Nanodetmct-model-gardenCOCO34.132.2
EfficientDet-lite0 ipynb (Keras) efficientdet-pytorchmct-model-gardenCOCO27.025.2
Semantic SegmentationDeeplabv3plus ipynb (Keras) bonlimemct-model-gardenPASCAL VOC76.93576.778
Instance SegmentationYOLOv8n-seg ipynb (PyTorch)Ultralyticsmct-model-gardenCOCO30.529.5
Pose EstimationYOLOv8n-pose ipynb (PyTorch)Ultralyticsmct-model-gardenCOCO50.447.1
- diff --git a/tutorials/notebooks/imx500_notebooks/__init__.py b/tutorials/notebooks/imx500_notebooks/__init__.py deleted file mode 100644 index d3f5a12fa..000000000 --- a/tutorials/notebooks/imx500_notebooks/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tutorials/notebooks/imx500_notebooks/keras/__init__.py b/tutorials/notebooks/imx500_notebooks/keras/__init__.py deleted file mode 100644 index d3f5a12fa..000000000 --- a/tutorials/notebooks/imx500_notebooks/keras/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tutorials/notebooks/imx500_notebooks/keras/example_keras_effdet_lite0_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/keras/example_keras_effdet_lite0_for_imx500.ipynb deleted file mode 100644 index d1c7db813..000000000 --- a/tutorials/notebooks/imx500_notebooks/keras/example_keras_effdet_lite0_for_imx500.ipynb +++ /dev/null @@ -1,533 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Post Training Quantization an EfficientDet Object Detection Model\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/keras/example_keras_effdet_lite0_for_imx500.ipynb)\n", - "\n", - "## Overview \n", - "\n", - "In this notebook, we'll demonstrate the post-training quantization using MCT for a pre-trained object detection model in Keras. In addition, we'll integrate a post-processing custom layer from [sony-custom-layers](https://github.com/sony/custom_layers) into the model. This custom layer is supported by the imx500 target platform capabilities.\n", - "\n", - "In this example we will use an existing pre-trained EfficientDet model taken from [efficientdet-pytorch](https://github.com/rwightman/efficientdet-pytorch). We will convert the model to a Keras functional model that includes the custom [PostProcess Layer](https://github.com/sony/custom_layers/blob/main/sony_custom_layers/keras/object_detection/ssd_post_process.py). Further, we will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on the COCO dataset.\n", - "\n", - "We'll use the [timm](https://github.com/huggingface/pytorch-image-models)'s data loader and evaluation capabilities used for the original PyTorch pretrained model. The conversion to the Keras model will not be covered. You can go over the conversion [here](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_keras/efficientdet).\n", - "\n", - "Steps:\n", - "* **Setup the environment**: install relevant packages, import them\n", - "* **Initialize the dataset**: Download the COCO evaluation set and prepare the evaluation code\n", - "* **Keras float model**: Create the Keras model, assign the pretrained weights and evaluate it\n", - "* **Quantize Keras mode**: Quantize the model and evaluate it\n", - "\n", - "**Note**: The following code should be run on a GPU." - ], - "metadata": { - "collapsed": false - }, - "id": "c9e7b10d2bfe67d4" - }, - { - "cell_type": "markdown", - "source": [ - "## Setup\n", - "\n", - "install and import relevant packages" - ], - "metadata": { - "collapsed": false - }, - "id": "d0e81b09e6d30873" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "!pip install -q tensorflow==2.15.*\n", - "!pip install -q torchvision\n", - "!pip install -q timm==0.9.16\n", - "!pip install -q effdet\n", - "!pip install -q sony-custom-layers\n", - "!pip install -q torch" - ], - "metadata": { - "collapsed": false - }, - "id": "6695a3ec84402e29" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from typing import Dict, Optional\n", - "from time import time\n", - "import torch\n", - "import tensorflow as tf\n", - "from timm.utils import AverageMeter\n", - "from effdet.config import get_efficientdet_config\n", - "from effdet import create_dataset, create_loader, create_evaluator\n", - "from effdet.data import resolve_input_config" - ], - "metadata": { - "collapsed": false - }, - "id": "735aee910cf92d42" - }, - { - "cell_type": "markdown", - "source": [ - "**Install model_compression_toolkit (MCT)**\n", - "Here we install the model compression toolkit (if it's not already installed). Additionally, in order to convert the PyTorch model, we'll need to use the conversion code in the [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials). We copy this folder and add it to the python path. \n" - ], - "metadata": { - "collapsed": false - }, - "id": "eda6ab0d8f0b6b56" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import sys\n", - "import importlib\n", - "import os\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ], - "metadata": { - "collapsed": false - }, - "id": "38e460c939d89482" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_keras.efficientdet import EfficientDetKeras" - ], - "metadata": { - "collapsed": false - }, - "id": "7461504d6590519a" - }, - { - "cell_type": "markdown", - "source": [ - "## Initialize dataset\n", - "\n", - "### Load the COCO evaluation set" - ], - "metadata": { - "collapsed": false - }, - "id": "f75abdac7950c038" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ], - "metadata": { - "collapsed": false - }, - "id": "1bf50c7706331ba8" - }, - { - "cell_type": "markdown", - "source": [ - "### Initialize the data loader and evaluation functions\n", - "\n", - "These functions were adapted from the [efficientdet-pytorch](https://github.com/rwightman/efficientdet-pytorch) repository." - ], - "metadata": { - "collapsed": false - }, - "id": "6010ecf194d4a6a1" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "class TorchWrapper(torch.nn.Module):\n", - " \"\"\"\n", - " A class to wrap the EfficientDet Keras model in a torch.nn.Module\n", - " so it can be evaluated with timm's evaluation code\n", - " \"\"\"\n", - " def __init__(self, keras_model: tf.keras.Model):\n", - " super(TorchWrapper, self).__init__()\n", - " self.model = keras_model\n", - "\n", - " @property\n", - " def config(self):\n", - " # a property used by the evaluation code\n", - " return self.model.config\n", - "\n", - " def forward(self, x: torch.Tensor,\n", - " img_info: Optional[Dict[str, torch.Tensor]] = None):\n", - " \"\"\"\n", - " mimics the forward inputs of the EfficientDet PyTorch model.\n", - " Args:\n", - " x: inputs images\n", - " img_info: input image info for scaling the outputs\n", - "\n", - " Returns:\n", - " A torch.Tensor of shape [Batch, Boxes, 6], the same as\n", - " the PyTorch model\n", - "\n", - " \"\"\"\n", - " device = x.device\n", - " keras_input = x.detach().cpu().numpy().transpose((0, 2, 3, 1))\n", - " outputs = self.model(keras_input)\n", - "\n", - " outs = [torch.Tensor(o.numpy()).to(device) for o in outputs]\n", - " # reorder boxes (y, x, y2, x2) to (x, y, x2, y2)\n", - " outs[0] = outs[0][:, :, [1, 0, 3, 2]]\n", - " # scale boxes to original image size\n", - " outs[0] = outs[0] * img_info['img_scale'].view((-1, 1, 1))\n", - " return torch.cat([outs[0], outs[1].unsqueeze(2),\n", - " outs[2].unsqueeze(2) + 1], 2)\n", - "\n", - "\n", - "def get_coco_dataloader(batch_size=16, split='val', config=None):\n", - " \"\"\"\n", - " Get the torch data-loader and evaluation object\n", - " Args:\n", - " batch_size: batch size for data loader\n", - " split: dataset split\n", - " config: model config\n", - "\n", - " Returns:\n", - " The DataLoader and evaluation object for calculating accuracy\n", - "\n", - " \"\"\"\n", - " root = './coco'\n", - "\n", - " args = dict(interpolation='bilinear', mean=None,\n", - " std=None, fill_color=None)\n", - " dataset = create_dataset('coco', root, split)\n", - " input_config = resolve_input_config(args, config)\n", - " loader = create_loader(\n", - " dataset,\n", - " input_size=input_config['input_size'],\n", - " batch_size=batch_size,\n", - " use_prefetcher=True,\n", - " interpolation=input_config['interpolation'],\n", - " fill_color=input_config['fill_color'],\n", - " mean=input_config['mean'],\n", - " std=input_config['std'],\n", - " num_workers=0,\n", - " pin_mem=False,\n", - " )\n", - " evaluator = create_evaluator('coco', dataset, pred_yxyx=False)\n", - "\n", - " return loader, evaluator\n", - "\n", - "\n", - "def acc_eval(_model: tf.keras.Model, batch_size=16, config=None):\n", - " \"\"\"\n", - " This function takes a Keras model, wraps it in a Torch model and runs evaluation\n", - " Args:\n", - " _model: Keras model\n", - " batch_size: batch size of the data loader\n", - " config: model config\n", - "\n", - " Returns:\n", - "\n", - " \"\"\"\n", - " # wrap Keras model in a Torch model so it can run in timm's evaluation code\n", - " _model = TorchWrapper(_model)\n", - " # EValuate input model\n", - " val_loader, evaluator = get_coco_dataloader(batch_size=batch_size, config=config)\n", - "\n", - " batch_time = AverageMeter()\n", - " end = time()\n", - " last_idx = len(val_loader) - 1\n", - " with torch.no_grad():\n", - " for i, (input, target) in enumerate(val_loader):\n", - " output = _model(input, img_info=target)\n", - "\n", - " evaluator.add_predictions(output, target)\n", - "\n", - " # measure elapsed time\n", - " batch_time.update(time() - end)\n", - " end = time()\n", - " if i % 10 == 0 or i == last_idx:\n", - " print(\n", - " f'Test: [{i:>4d}/{len(val_loader)}] '\n", - " f'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {input.size(0) / batch_time.avg:>7.2f}/s) '\n", - " )\n", - "\n", - " return evaluator.evaluate()" - ], - "metadata": { - "collapsed": false - }, - "id": "5833c805a1ca77aa" - }, - { - "cell_type": "markdown", - "source": [ - "## Keras model\n", - "\n", - "Create the Keras model and copy weights from pretrained PyTorch weights file. Saved as \"model.keras\"." - ], - "metadata": { - "collapsed": false - }, - "id": "7e589b01c6a45a9e" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "model_name = 'tf_efficientdet_lite0'\n", - "config = get_efficientdet_config(model_name)\n", - "\n", - "model = EfficientDetKeras(config, pretrained_backbone=False).get_model([*config.image_size] + [3])" - ], - "metadata": { - "collapsed": false - }, - "id": "6f1dacee7a949928" - }, - { - "cell_type": "markdown", - "source": [ - "### Evaluate Keras model\n", - "\n", - "We evaluate the model to verify the conversion to a Keras model succeeded. The result will be compared to the quantized model evaluation." - ], - "metadata": { - "collapsed": false - }, - "id": "ef6b474a69358e03" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "float_map = acc_eval(model, batch_size=64, config=config)" - ], - "metadata": { - "collapsed": false - }, - "id": "dc2c87ab3460f395" - }, - { - "cell_type": "markdown", - "source": [ - "## Quantize Keras model\n", - "\n", - "In this section, the Keras model will be quantized by the MCT, with the following parameters:\n", - "- **Target Platform**: IMX500-v1\n", - "- **Mixed-Precision** weights compression so the model will fit the IMX500 memory size\n", - "\n", - "The quantized model is saved as \"quant_model.keras\"." - ], - "metadata": { - "collapsed": false - }, - "id": "aca80a0fc370eef3" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "\n", - "loader, _ = get_coco_dataloader(split='val', config=config)\n", - "\n", - "\n", - "def get_representative_dataset(n_iter):\n", - " \"\"\"\n", - " This function creates a representative dataset generator\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - "\n", - " Returns:\n", - " A representative dataset generator\n", - "\n", - " \"\"\"\n", - "\n", - " def representative_dataset():\n", - " \"\"\"\n", - " Creates a representative dataset generator from a PyTorch data loader, The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C]\n", - " Returns:\n", - " A representative dataset generator\n", - "\n", - " \"\"\"\n", - " ds_iter = iter(loader)\n", - " for _ in range(n_iter):\n", - " t = next(ds_iter)[0]\n", - " # Convert the Torch tensor from the data loader to a numpy array and transpose to the\n", - " # right shape: [B, C, H, W] -> [B, H, W, C]\n", - " tf_shaped_tensor = t.detach().cpu().numpy().transpose((0, 2, 3, 1))\n", - " yield [tf_shaped_tensor]\n", - "\n", - " return representative_dataset\n", - "\n", - "\n", - "# Set IMX500-v1 TPC\n", - "tpc = mct.get_target_platform_capabilities(\"tensorflow\", 'imx500', target_platform_version='v1')\n", - "# set weights memory size, so the quantized model will fit the IMX500 memory\n", - "resource_utilization = mct.core.ResourceUtilization(weights_memory=2674291)\n", - "# set MixedPrecision configuration for compressing the weights\n", - "mp_config = mct.core.MixedPrecisionQuantizationConfig(use_hessian_based_scores=False)\n", - "core_config = mct.core.CoreConfig(mixed_precision_config=mp_config)\n", - "quant_model, _ = mct.ptq.keras_post_training_quantization(\n", - " model,\n", - " get_representative_dataset(20),\n", - " target_resource_utilization=resource_utilization,\n", - " core_config=core_config,\n", - " target_platform_capabilities=tpc)" - ], - "metadata": { - "collapsed": false - }, - "id": "6f1fa147c5a16df" - }, - { - "cell_type": "markdown", - "source": [ - "### Evaluate quantized Keras model\n", - "\n", - "Quantized Keras model evaluation applied the same as the original model." - ], - "metadata": { - "collapsed": false - }, - "id": "79ae299b0b019953" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "quant_map = acc_eval(quant_model, batch_size=64, config=config)\n", - "\n", - "print(f' ===>> Float model mAP = {100*float_map:2.3f}, Quantized model mAP = {100*quant_map:2.3f}')" - ], - "metadata": { - "collapsed": false - }, - "id": "6f93b9b932fb39cc" - }, - { - "cell_type": "markdown", - "source": [ - "## Export and Load the quantized model\n", - "Lastly, we will demonstrate how to export the quantized model into a file and then load it.\n", - "\n", - "We will use `keras_export_model` function to save the quantized model with the integrated custom quantizers into a \".keras\" file format." - ], - "metadata": { - "collapsed": false - }, - "id": "ee1b78821510df89" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Export a keras model with mctq custom quantizers into a file\n", - "mct.exporter.keras_export_model(model=quant_model,\n", - " save_model_path='./quant_model.keras')" - ], - "metadata": { - "collapsed": false - }, - "id": "6012dc5634e36841" - }, - { - "cell_type": "markdown", - "source": [ - "Then, we can load the saved model using `keras_load_quantized_model` function. For this specific case, we'll have to supply the load function with an extra custom layer integrated into the model, namely `SSDPostProcess`." - ], - "metadata": { - "collapsed": false - }, - "id": "4a311376344a903" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess\n", - "\n", - "custom_objects = {SSDPostProcess.__name__: SSDPostProcess} # An extra custom layer integrated in the model \n", - "quant_model_from_file = mct.keras_load_quantized_model('./quant_model.keras', custom_objects=custom_objects)" - ], - "metadata": { - "collapsed": false - }, - "id": "3c7de31aa90bc002" - }, - { - "cell_type": "markdown", - "source": [ - "\\\n", - "Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ], - "metadata": { - "collapsed": false - }, - "id": "d36d177779d29347" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb deleted file mode 100644 index 57c76e472..000000000 --- a/tutorials/notebooks/imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb +++ /dev/null @@ -1,606 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "f8194007-6ea7-4e00-8931-a37ca2d0dd20", - "metadata": { - "id": "f8194007-6ea7-4e00-8931-a37ca2d0dd20" - }, - "source": [ - "# Post-Training Quantization Example of MobileNetV2 Keras Model" - ] - }, - { - "cell_type": "markdown", - "id": "9be59ea8-e208-4b64-aede-1dd6270b3540", - "metadata": { - "id": "9be59ea8-e208-4b64-aede-1dd6270b3540" - }, - "source": [ - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb)" - ] - }, - { - "cell_type": "markdown", - "id": "930e6d6d-4980-4d66-beed-9ff5a494acf9", - "metadata": { - "id": "930e6d6d-4980-4d66-beed-9ff5a494acf9" - }, - "source": [ - "## Overview" - ] - }, - { - "cell_type": "markdown", - "id": "699be4fd-d382-4eec-9d3f-e2e85cfb1762", - "metadata": { - "id": "699be4fd-d382-4eec-9d3f-e2e85cfb1762" - }, - "source": [ - "This tutorial demonstrates a pre-trained model quantization using the **Model Compression Toolkit (MCT)**. \n", - "\n", - "It is done using the MCT's **Post-Training Quantization** tool. \n", - "\n", - "As we will see, post-training quantization is a low complexity yet effective quantization scheme. \n", - "\n", - "In this example, we quantize the model and evaluate the accuracy before and after quantization." - ] - }, - { - "cell_type": "markdown", - "id": "85199e25-c587-41b1-aaf5-e1d23ce97ca1", - "metadata": { - "id": "85199e25-c587-41b1-aaf5-e1d23ce97ca1" - }, - "source": [ - "## Summary" - ] - }, - { - "cell_type": "markdown", - "id": "9c0e9543-d356-412f-acf1-c2ecad553e06", - "metadata": { - "id": "9c0e9543-d356-412f-acf1-c2ecad553e06" - }, - "source": [ - "In this tutorial we cover the following subjects:\n", - "\n", - "1. Post-Training Quantization using MCT.\n", - "2. Loading and preprocessing ImageNet's validation dataset.\n", - "3. Constructing an unlabeled representative dataset.\n", - "4. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "04228b7c-00f1-4ded-bead-722e2a4e89a0", - "metadata": { - "tags": [], - "id": "04228b7c-00f1-4ded-bead-722e2a4e89a0" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "id": "2657cf1a-654d-45a6-b877-8bf42fc26d0d", - "metadata": { - "id": "2657cf1a-654d-45a6-b877-8bf42fc26d0d" - }, - "source": [ - "Install and import the relevant packages:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "324685b9-5dcc-4d22-80f4-dec9a93d3324", - "metadata": { - "tags": [], - "id": "324685b9-5dcc-4d22-80f4-dec9a93d3324" - }, - "outputs": [], - "source": [ - "TF_VER = '2.14.0'\n", - "\n", - "!pip install -q tensorflow=={TF_VER}\n", - "\n", - "import importlib\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3f0acc8-281c-4bca-b0b9-3d7677105f19", - "metadata": { - "id": "b3f0acc8-281c-4bca-b0b9-3d7677105f19" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "import keras\n", - "import model_compression_toolkit as mct\n", - "import os" - ] - }, - { - "cell_type": "markdown", - "id": "0c7fed0d-cfc8-41ee-adf1-22a98110397b", - "metadata": { - "id": "0c7fed0d-cfc8-41ee-adf1-22a98110397b" - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Download ImageNet dataset with only the validation split.\n", - "\n", - "**Note** that for demonstration purposes we use the validation set for the model quantization routines. Usually, a subset of the training dataset is used, but loading it is a heavy procedure that is unnecessary for the sake of this demonstration.\n", - "\n", - "This step may take several minutes..." - ], - "metadata": { - "collapsed": false - }, - "id": "aecde59e4c37b1da" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir('imagenet'):\n", - " !mkdir imagenet\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz\n", - " !mv ILSVRC2012_devkit_t12.tar.gz imagenet/\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar\n", - " !mv ILSVRC2012_img_val.tar imagenet/" - ], - "metadata": { - "collapsed": false - }, - "id": "5c18b26e293b085e" - }, - { - "cell_type": "markdown", - "source": [ - "Extract ImageNet validation dataset using using 'prepare_imagenet.sh' script" - ], - "metadata": { - "collapsed": false - }, - "id": "e48ff22f70ce997e" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir('imagenet/val'):\n", - " import subprocess\n", - " !git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - " !chmod +x tutorials/resources/scripts/prepare_imagenet.sh\n", - " subprocess.run(['tutorials/resources/scripts/prepare_imagenet.sh'])" - ], - "metadata": { - "collapsed": false - }, - "id": "5a6fb997c54aa3fb" - }, - { - "cell_type": "markdown", - "id": "028112db-3143-4fcb-96ae-e639e6476c31", - "metadata": { - "id": "028112db-3143-4fcb-96ae-e639e6476c31" - }, - "source": [ - "Define the required preprocessing method for the pretrained model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ed56f505-97ff-4acb-8ad8-ef09c53e9d57", - "metadata": { - "id": "ed56f505-97ff-4acb-8ad8-ef09c53e9d57" - }, - "outputs": [], - "source": [ - "def imagenet_preprocess_input(images, labels):\n", - " return tf.keras.applications.mobilenet_v2.preprocess_input(images), labels" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Representative dataset construction\n", - "We show how to create a generator for the representative dataset, which is required for post-training quantization.\n", - "\n", - "The representative dataset is used for collecting statistics on the inference outputs of all layers in the model.\n", - " \n", - "In order to decide on the size of the representative dataset, we configure the batch size and the number of calibration iterations.\n", - "This gives us the total number of samples that will be used during PTQ (batch_size x n_iter).\n", - "In this example we set `batch_size = 50` and `n_iter = 10`, resulting in a total of 500 representative images.\n", - "\n", - "Please ensure that the dataset path has been set correctly." - ], - "metadata": { - "collapsed": false - }, - "id": "fcbb3eecae5346a9" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0408f624-ab68-4989-95f8-f9d327882840", - "metadata": { - "id": "0408f624-ab68-4989-95f8-f9d327882840" - }, - "outputs": [], - "source": [ - "from typing import Generator\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './imagenet/val'\n", - "BATCH_SIZE = 50\n", - "n_iter=10\n", - "\n", - "# Create representative dataset generator\n", - "def get_representative_dataset() -> Generator:\n", - " \"\"\"A function that loads the dataset and returns a representative dataset generator.\n", - "\n", - " Returns:\n", - " Generator: A generator yielding batches of preprocessed images.\n", - " \"\"\"\n", - "\n", - " # Load the dataset from folder\n", - " print('loading dataset, this may take few minutes ...') \n", - " dataset = tf.keras.utils.image_dataset_from_directory(\n", - " directory=REPRESENTATIVE_DATASET_FOLDER,\n", - " batch_size=BATCH_SIZE,\n", - " image_size=[224, 224],\n", - " shuffle=True,\n", - " crop_to_aspect_ratio=True,\n", - " interpolation='bilinear') \n", - " # Preprocess the data\n", - " dataset = dataset.map(lambda x, y: (imagenet_preprocess_input(x, y)))\n", - "\n", - " def representative_dataset() -> Generator:\n", - " \"\"\"A generator function that yields batch of preprocessed images.\n", - "\n", - " Yields:\n", - " A batch of preprocessed images.\n", - " \"\"\"\n", - " for _ in range(n_iter):\n", - " yield dataset.take(1).get_single_element()[0].numpy()\n", - "\n", - " return representative_dataset\n", - "\n", - "# Create a representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "4a1e9ba6-2954-4506-ad5c-0da273701ba5", - "metadata": { - "id": "4a1e9ba6-2954-4506-ad5c-0da273701ba5" - }, - "source": [ - "## Model Post-Training quantization using MCT" - ] - }, - { - "cell_type": "markdown", - "id": "55edbb99-ab2f-4dde-aa74-4ddee61b2615", - "metadata": { - "id": "55edbb99-ab2f-4dde-aa74-4ddee61b2615" - }, - "source": [ - "This is the main part in which we quantize our model.\n", - "\n", - "First, we load a pre-trained MobileNetV2 model from Keras, in 32-bits floating-point precision format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "80cac59f-ec5e-41ca-b673-96220924a47c", - "metadata": { - "id": "80cac59f-ec5e-41ca-b673-96220924a47c" - }, - "outputs": [], - "source": [ - "from keras.applications.mobilenet_v2 import MobileNetV2\n", - "float_model = MobileNetV2()" - ] - }, - { - "cell_type": "markdown", - "id": "8a8b486a-ca39-45d9-8699-f7116b0414c9", - "metadata": { - "id": "8a8b486a-ca39-45d9-8699-f7116b0414c9" - }, - "source": [ - "Next, we need to define a `TargetPlatformCapability` object, representing the HW specifications on which we wish to eventually deploy our quantized model.\n", - "\n", - "In addition, we need to define the Quantization Configuration for our PTQ routine.\n", - "\n", - "Here, we demonstrate how to define a quantization configuration with several key argument that can be controlled by the user.\n", - "**Note** that you can skip this part if you prefer to use the default quantization settings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from model_compression_toolkit.core import QuantizationErrorMethod\n", - "\n", - "# Specify the IMX500-v1 target platform capability (TPC) \n", - "tpc = mct.get_target_platform_capabilities(\"tensorflow\", 'imx500', target_platform_version='v1')\n", - "\n", - "# Set the following quantization configurations:\n", - "# Choose the desired QuantizationErrorMethod for the quantization parameters search.\n", - "# Enable weights bias correction induced by quantization.\n", - "# Enable shift negative corrections for improving 'signed' non-linear functions quantization (such as swish, prelu, etc.) \n", - "# Set the threshold to filter outliers with z_score of 16. \n", - "q_config = mct.core.QuantizationConfig(activation_error_method=QuantizationErrorMethod.MSE,\n", - " weights_error_method=QuantizationErrorMethod.MSE,\n", - " weights_bias_correction=True,\n", - " shift_negative_activation_correction=True,\n", - " z_threshold=16)\n", - "\n", - "ptq_config = mct.core.CoreConfig(quantization_config=q_config)" - ], - "metadata": { - "collapsed": false - }, - "id": "2edacb5b7779e4d8" - }, - { - "cell_type": "markdown", - "source": [ - "### Run model Post-Training Quantization\n", - "Lastly, we quantize our model using MCT's post-training quantization API." - ], - "metadata": { - "collapsed": false - }, - "id": "6162dd6dd1fce7ab" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33f8373a-82a5-4b97-9a10-25ee2341d148", - "metadata": { - "id": "33f8373a-82a5-4b97-9a10-25ee2341d148" - }, - "outputs": [], - "source": [ - "quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(\n", - " in_model=float_model, \n", - " representative_data_gen=representative_dataset_gen, \n", - " core_config=ptq_config, \n", - " target_platform_capabilities=tpc)" - ] - }, - { - "cell_type": "markdown", - "id": "7382ada6-d001-4564-907d-767fa4e9ec56", - "metadata": { - "id": "7382ada6-d001-4564-907d-767fa4e9ec56" - }, - "source": [ - "That's it! Our model is now quantized." - ] - }, - { - "cell_type": "markdown", - "id": "5a7a5150-3b92-49b5-abb2-06e6c5c91d6b", - "metadata": { - "id": "5a7a5150-3b92-49b5-abb2-06e6c5c91d6b" - }, - "source": [ - "## Models evaluation" - ] - }, - { - "cell_type": "markdown", - "id": "0ce4fc61-e13c-48be-9f7c-d441ad76a386", - "metadata": { - "id": "0ce4fc61-e13c-48be-9f7c-d441ad76a386" - }, - "source": [ - "In order to evaluate our models, we first need to load the validation dataset. As before, please ensure that the dataset path has been set correctly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eef7c875-c4fc-4819-97e5-721805cba546", - "metadata": { - "tags": [], - "id": "eef7c875-c4fc-4819-97e5-721805cba546" - }, - "outputs": [], - "source": [ - "TEST_DATASET_FOLDER = './imagenet/val'\n", - "def get_validation_dataset() -> tf.data.Dataset:\n", - " \"\"\"Load the validation dataset for evaluation.\n", - "\n", - " Returns:\n", - " tf.data.Dataset: The validation dataset.\n", - " \"\"\"\n", - " dataset = tf.keras.utils.image_dataset_from_directory(\n", - " directory=TEST_DATASET_FOLDER,\n", - " batch_size=BATCH_SIZE,\n", - " image_size=[224, 224],\n", - " shuffle=False,\n", - " crop_to_aspect_ratio=True,\n", - " interpolation='bilinear')\n", - " dataset = dataset.map(lambda x, y: (imagenet_preprocess_input(x, y)))\n", - " return dataset\n", - "\n", - "evaluation_dataset = get_validation_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "9889d217-90a6-4615-8569-38dc9cdd5999", - "metadata": { - "id": "9889d217-90a6-4615-8569-38dc9cdd5999" - }, - "source": [ - "Let's start with the floating-point model evaluation.\n", - "\n", - "We need to compile the model before evaluation and set the loss and the evaluation metric:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d3a0ae9-beaa-4af8-8481-49d4917c2209", - "metadata": { - "id": "1d3a0ae9-beaa-4af8-8481-49d4917c2209" - }, - "outputs": [], - "source": [ - "float_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[\"accuracy\"])\n", - "results = float_model.evaluate(evaluation_dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "ead4a6f3-86a0-4e6c-8229-a2ff514f7b8c", - "metadata": { - "id": "ead4a6f3-86a0-4e6c-8229-a2ff514f7b8c" - }, - "source": [ - "Finally, let's evaluate the quantized model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bc377ee-39b4-4ced-95db-f7d51ab60848", - "metadata": { - "id": "1bc377ee-39b4-4ced-95db-f7d51ab60848" - }, - "outputs": [], - "source": [ - "quantized_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[\"accuracy\"])\n", - "results = quantized_model.evaluate(evaluation_dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "ebfbb4de-5b6e-4732-83d3-a21e96cdd866", - "metadata": { - "id": "ebfbb4de-5b6e-4732-83d3-a21e96cdd866" - }, - "source": [ - "You can see that we got a very small degradation with a compression rate of x4 !" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Now, we can export the model to Keras and TFLite. Please ensure that the `save_model_path` has been set correctly." - ], - "metadata": { - "id": "6YjIdiRRjgkL" - }, - "id": "6YjIdiRRjgkL" - }, - { - "cell_type": "code", - "source": [ - "mct.exporter.keras_export_model(model=quantized_model, save_model_path='./qmodel.tflite',\n", - " serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)\n", - "\n", - "mct.exporter.keras_export_model(model=quantized_model, save_model_path='./qmodel.keras')" - ], - "metadata": { - "id": "z3CA16-ojoFL" - }, - "id": "z3CA16-ojoFL", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "id": "14877777", - "metadata": { - "id": "14877777" - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "id": "bb7e1572", - "metadata": { - "id": "bb7e1572" - }, - "source": [ - "In this tutorial, we demonstrated how to quantize a pre-trained model using MCT with a few lines of code. We saw that we can achieve an x4 compression ratio with minimal performance degradation.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "01c1645e-205c-4d9a-8af3-e497b3addec1", - "metadata": { - "id": "01c1645e-205c-4d9a-8af3-e497b3addec1" - }, - "source": [ - "\n", - "\n", - "Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License.\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "colab": { - "provenance": [] - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/keras/example_keras_nanodet_plus_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/keras/example_keras_nanodet_plus_for_imx500.ipynb deleted file mode 100644 index 28380f01b..000000000 --- a/tutorials/notebooks/imx500_notebooks/keras/example_keras_nanodet_plus_for_imx500.ipynb +++ /dev/null @@ -1,392 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "4c261298-309f-41e8-9338-a5e205f09b05", - "metadata": {}, - "source": [ - "# Post Training Quantization a Nanodet-Plus Object Detection Model\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/keras/example_keras_nanodet_plus_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "\n", - "In this tutorial, we'll demonstrate the post-training quantization using MCT for a pre-trained object detection model in Keras. Specifically, we'll integrate post-processing, including the non-maximum suppression (NMS) layer, into the model. This integration aligns with the imx500 target platform capabilities.\n", - "\n", - "In this example we will use an existing pre-trained Nanodet-Plus model taken from [https://github.com/RangiLyu/nanodet](https://github.com/RangiLyu/nanodet). We will convert the model to a Tensorflow model that includes box decoding and NMS layer. Further, we will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on COCO dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT of Keras object detection model including the post-processing.\n", - "2. Data preparation - loading and preprocessing validation and representative datasets from COCO.\n", - "3. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Setup\n", - "Install the relevant packages." - ], - "metadata": { - "collapsed": false - }, - "id": "d74f9c855ec54081" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "TF_VER = '2.14.0'\n", - "\n", - "!pip install -q tensorflow=={TF_VER}\n", - "!pip install -q pycocotools\n", - "!pip install 'huggingface-hub<=0.21.4'" - ], - "metadata": { - "collapsed": false - }, - "id": "7c7fa04c9903736f" - }, - { - "cell_type": "markdown", - "source": [ - "Install MCT (if it's not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path.\n" - ], - "metadata": { - "collapsed": false - }, - "id": "32eedce88a1e52bd" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ], - "metadata": { - "collapsed": false - }, - "id": "342eb1e5639e0cb7" - }, - { - "cell_type": "markdown", - "source": [ - "Finally, load COCO evaluation set" - ], - "metadata": { - "collapsed": false - }, - "id": "625cd9bfff9aa210" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ], - "metadata": { - "collapsed": false - }, - "id": "ab47e0b3bbfa4bd9" - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Floating Point Model\n", - "\n", - "### Load the pre-trained weights of Nanodet-Plus\n", - "We begin by loading a pre-trained [Nanodet-Plus](https://huggingface.co/SSI-DNN/keras_nanodet_plus_x1.5_416x416) model. This implementation is based on [nanodet](https://github.com/RangiLyu/nanodet). For further insights into the model's implementation details, please refer to [mct_model_garden](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_keras/nanodet). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "outputs": [], - "source": [ - "from huggingface_hub import from_pretrained_keras\n", - "\n", - "model = from_pretrained_keras('SSI-DNN/keras_nanodet_plus_x1.5_416x416')" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Generate Nanoedet-Plus Keras model\n", - "In the following steps, we integrate the post-processing components, which include box decoding layers following by tensorflow [tf.image.combined_non_max_suppression](https://www.tensorflow.org/api_docs/python/tf/image/combined_non_max_suppression) layer.\n" - ], - "metadata": { - "collapsed": false - }, - "id": "7f148e78b769f1dc" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from keras.models import Model\n", - "from tutorials.mct_model_garden.models_keras.nanodet.nanodet_keras_model import nanodet_box_decoding\n", - "\n", - "# Parameters of nanodet-plus-m-1.5x_416\n", - "INPUT_RESOLUTION = 416\n", - "INPUT_SHAPE = (INPUT_RESOLUTION, INPUT_RESOLUTION, 3)\n", - "SCALE_FACTOR = 1.5\n", - "BOTTLENECK_RATIO = 0.5\n", - "FEATURE_CHANNELS = 128\n", - "\n", - "# Add Nanodet Box decoding layer (decode the model outputs to bounding box coordinates)\n", - "scores, boxes = nanodet_box_decoding(model.output, res=INPUT_RESOLUTION)\n", - "\n", - "# Add Tensorflow NMS layer\n", - "outputs = tf.image.combined_non_max_suppression(\n", - " boxes,\n", - " scores,\n", - " max_output_size_per_class=300,\n", - " max_total_size=300,\n", - " iou_threshold=0.65,\n", - " score_threshold=0.001,\n", - " pad_per_class=False,\n", - " clip_boxes=False\n", - " )\n", - "\n", - "model = Model(model.input, outputs, name='Nanodet_plus_m_1.5x_416')\n", - "\n", - "print('Model is ready for evaluation')" - ], - "metadata": { - "collapsed": false - }, - "id": "698ce1d40f2cdf1f" - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "#### Evaluate the floating point model\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model. \n", - "Note that we set the \"batch_size\" to 5 and the preprocessing according to [Nanodet](https://github.com/RangiLyu/nanodet/tree/main).\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": {}, - "outputs": [], - "source": [ - "import cv2\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_dataset_generator, CocoEval\n", - "\n", - "EVAL_DATASET_FOLDER = './coco/val2017'\n", - "EVAL_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "\n", - "BATCH_SIZE = 5\n", - "\n", - "def nanodet_preprocess(x):\n", - " img_mean = [103.53, 116.28, 123.675]\n", - " img_std = [57.375, 57.12, 58.395]\n", - " x = cv2.resize(x, (416, 416))\n", - " x = (x - img_mean) / img_std\n", - " return x\n", - "\n", - "# Load COCO evaluation set\n", - "val_dataset = coco_dataset_generator(dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=nanodet_preprocess,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Initialize the evaluation metric object\n", - "coco_metric = CocoEval(EVAL_DATASET_ANNOTATION_FILE)\n", - "\n", - "# Iterate and the evaluation set\n", - "for batch_idx, (images, targets) in enumerate(val_dataset):\n", - " \n", - " # Run inference on the batch\n", - " outputs = model(images)\n", - "\n", - " # Add the model outputs to metric object (a dictionary of outputs after postprocess: boxes, scores & classes)\n", - " coco_metric.add_batch_detections(outputs, targets)\n", - " if (batch_idx + 1) % 100 == 0:\n", - " print(f'processed {(batch_idx + 1) * BATCH_SIZE} images')\n", - "\n", - "# Print float model mAP results\n", - "print(\"Float model mAP: {:.4f}\".format(coco_metric.result()[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "015e760b-6555-45b4-aaf9-500e974c1d86", - "metadata": {}, - "source": [ - "## Quantize Model\n", - "\n", - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now we are ready to use MCT's post training quantization! We will define a representative dataset and proceed with the model quantization. Please note that, for the sake of demonstration, we'll use the evaluation dataset as our representative dataset (and skip the download of the training dataset). We will use 100 representative images for calibration (20 iterations of \"batch_size\" images each).\n", - "Same as the above section, please ensure that the dataset path has been set correctly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from typing import Iterator, Tuple, List\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/val2017'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = coco_dataset_generator(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=nanodet_preprocess,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator.\n", - " \n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " \"\"\"\n", - " Creates a representative dataset generator from a PyTorch data loader, The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " \n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\"\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Preform post training quantization \n", - "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", - " get_representative_dataset(n_iters, representative_dataset))\n", - "\n", - "print('Quantized model is ready')" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Evaluate quantized model\n", - "Lastly, we can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as EPTQ (Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Re-load COCO evaluation set\n", - "val_dataset = coco_dataset_generator(dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=nanodet_preprocess,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Initialize the evaluation metric object\n", - "coco_metric = CocoEval(EVAL_DATASET_ANNOTATION_FILE)\n", - "\n", - "# Iterate and the evaluation set\n", - "for batch_idx, (images, targets) in enumerate(val_dataset):\n", - " # Run inference on the batch\n", - " outputs = quant_model(images)\n", - "\n", - " # Add the model outputs to metric object (a dictionary of outputs after postprocess: boxes, scores & classes)\n", - " coco_metric.add_batch_detections(outputs, targets)\n", - " if (batch_idx + 1) % 100 == 0:\n", - " print(f'processed {(batch_idx + 1) * BATCH_SIZE} images')\n", - "\n", - "# Print quantized model mAP results\n", - "print(\"Quantized model mAP: {:.4f}\".format(coco_metric.result()[0]))" - ] - }, - { - "cell_type": "markdown", - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ], - "metadata": { - "collapsed": false - }, - "id": "764aee7ef2258942" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/keras/keras_deeplabv3plus_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/keras/keras_deeplabv3plus_for_imx500.ipynb deleted file mode 100644 index 0657e4c99..000000000 --- a/tutorials/notebooks/imx500_notebooks/keras/keras_deeplabv3plus_for_imx500.ipynb +++ /dev/null @@ -1,356 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "4c261298-309f-41e8-9338-a5e205f09b05", - "metadata": {}, - "source": [ - "# DeeplabV3+ Semantic Segmentation Keras Model - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/keras/keras_deeplabv3plus_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we'll quantize the DeeplabV3+ model for semantic segmentation with MCT's post-training quantization techniques. The pretrained model was created with [bonlime's repo](https://github.com/bonlime/keras-deeplab-v3-plus), after making the following changes:\n", - "\n", - "1. Input image size set to 320x320.\n", - "2. `expand_dims` in `Lambda` layer replaced with a simple `Reshape` layer.\n", - "3. `tf.compat.v1.image.resize` in `Lambda` layer replaced with a `Resizing` layer.\n", - "4. Added `argmax` at model output to calculate the class id." - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Setup\n", - "### Install the relevant packages" - ], - "metadata": { - "collapsed": false - }, - "id": "d74f9c855ec54081" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "TF_VER = '2.14.0'\n", - "\n", - "!pip install -q tensorflow=={TF_VER}\n", - "!pip install 'huggingface-hub<=0.21.4'\n", - "\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit" - ], - "metadata": { - "collapsed": false - }, - "id": "7c7fa04c9903736f" - }, - { - "cell_type": "markdown", - "source": [ - "### Download Pascel VOC 2012 dataset\n", - "\n", - "Download the Pascal dataset to the local folder." - ], - "metadata": { - "collapsed": false - }, - "id": "7a1038b9fd98bba2" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if not os.path.isdir('VOCdevkit'):\n", - " !wget -nc http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n", - " !tar -xf VOCtrainval_11-May-2012.tar\n", - " !echo Done loading PascelVOC 2012" - ], - "metadata": { - "collapsed": false - }, - "id": "8bea492d71b4060f" - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", - "\n", - "We begin by loading a pre-trained [DeeplabV3+](https://huggingface.co/SSI-DNN/keras_deeplabv3_plus_320) model. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "outputs": [], - "source": [ - "from huggingface_hub import from_pretrained_keras\n", - "\n", - "model = from_pretrained_keras('SSI-DNN/keras_deeplabv3_plus_320')" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Create dataset object" - ], - "metadata": { - "collapsed": false - }, - "id": "8e684d59a80f273e" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import os\n", - "import numpy as np\n", - "from PIL import Image\n", - "import cv2\n", - "\n", - "\n", - "class PascalDataset:\n", - " def __init__(self, img_size=320, batch_size=16):\n", - " base_path = 'VOCdevkit/VOC2012'\n", - " val_file = os.path.join(base_path, 'ImageSets', 'Segmentation', 'val.txt')\n", - " with open(val_file) as f:\n", - " self.images_names = [os.path.join(base_path, 'JPEGImages', fname.split('\\n')[0] + '.jpg')\n", - " for fname in f.readlines()]\n", - " self.annotations_dict = {}\n", - " with open(val_file) as f:\n", - " for fname in f.readlines():\n", - " full_path_label_file = os.path.join(base_path, 'SegmentationClass', fname.split('\\n')[0] + '.png')\n", - " self.annotations_dict.update({os.path.basename(full_path_label_file.replace('.png', '.jpg')): full_path_label_file})\n", - "\n", - " self.inds = list(range(len(self.images_names)))\n", - " self.img_size = img_size\n", - " self.batch_size = batch_size\n", - "\n", - " def shuffle(self):\n", - " self.inds = np.random.permutation(self.inds)\n", - "\n", - " def __len__(self):\n", - " return int(np.ceil(len(self.images_names) / self.batch_size))\n", - "\n", - " def __iter__(self):\n", - " img_batch, ann_batch = [], []\n", - " for b, i in enumerate(self.inds):\n", - " img_name = self.images_names[i]\n", - " _name = img_name.split('/')[-1]\n", - " img = np.array(Image.open(img_name))\n", - " img = cv2.resize(img, (self.img_size, self.img_size))\n", - " img = (img - 127.5) / 127.5\n", - " ann = np.array(Image.open(self.annotations_dict[_name]))\n", - " ann = cv2.resize(ann, (self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST)\n", - "\n", - " img_batch.append(img)\n", - " ann_batch.append(ann)\n", - " if len(img_batch) == self.batch_size:\n", - " yield [np.stack(img_batch), np.stack(ann_batch)]\n", - " img_batch, ann_batch = [], []\n", - "\n", - " yield [np.stack(img_batch), np.stack(ann_batch)]" - ], - "metadata": { - "collapsed": false - }, - "id": "14a6e0ec4235701d" - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 320 images, divided into 20 iterations of 'batch_size' images each. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from typing import Iterator, List\n", - "\n", - "\n", - "n_iters = 20\n", - "\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset():\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " \n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - "\n", - " representative_dataset = PascalDataset()\n", - " representative_dataset.shuffle()\n", - " \n", - " \n", - " def _representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(representative_dataset)\n", - " for _ in range(n_iters):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return _representative_dataset\n", - "\n", - "\n", - "# Set IMX500-v1 TPC\n", - "tpc = mct.get_target_platform_capabilities(\"tensorflow\", 'imx500', target_platform_version='v1')\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", - " get_representative_dataset(),\n", - " target_platform_capabilities=tpc)\n", - "print('Quantized model is ready')" - ], - "metadata": { - "collapsed": false - }, - "id": "56393342-cecf-4f64-b9ca-2f515c765942" - }, - { - "cell_type": "markdown", - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.keras` format file. Please ensure that the `save_model_path` has been set correctly. " - ], - "metadata": { - "collapsed": false - }, - "id": "3be2016acdc9da60" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "mct.exporter.keras_export_model(model=quant_model, save_model_path='qmodel.keras')" - ], - "metadata": { - "collapsed": false - }, - "id": "72dd885c7b92fa93" - }, - { - "cell_type": "markdown", - "id": "015e760b-6555-45b4-aaf9-500e974c1d86", - "metadata": {}, - "source": [ - "## Evaluation on Pascal dataset\n", - "\n", - "### Floating point model evaluation\n", - "\n", - "Evaluate the floating point model on PascalVoc using tensorflow MeanIoU metric." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from tqdm import tqdm\n", - "\n", - "metric = tf.keras.metrics.MeanIoU(21, ignore_class=255)\n", - "for imgs, labels in tqdm(PascalDataset()):\n", - " out = model(imgs)\n", - " metric.update_state(labels, out)\n", - "print(f'\\nFloat model MeanIOU = {metric.result().numpy()*100:2.3f}')" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "Lastly, we can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "metric = tf.keras.metrics.MeanIoU(21, ignore_class=255)\n", - "for imgs, labels in tqdm(PascalDataset()):\n", - " out = quant_model(imgs)\n", - " metric.update_state(labels, out)\n", - "print(f'\\nQuantized model MeanIOU = {metric.result().numpy()*100:2.3f}')" - ] - }, - { - "cell_type": "markdown", - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ], - "metadata": { - "collapsed": false - }, - "id": "6d93352843a27433" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "colab": { - "provenance": [] - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/keras/keras_yolov8n_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/keras/keras_yolov8n_for_imx500.ipynb deleted file mode 100644 index 4150d2153..000000000 --- a/tutorials/notebooks/imx500_notebooks/keras/keras_yolov8n_for_imx500.ipynb +++ /dev/null @@ -1,449 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "4c261298-309f-41e8-9338-a5e205f09b05", - "metadata": {}, - "source": [ - "# YOLOv8n Object Detection Keras Model - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/keras/keras_yolov8n_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained YOLOv8n model from the MCT Models Library, compress it, and make it deployment-ready using MCT's post-training quantization techniques. Additionally, we will demonstrate an optional optimization step using MCT's Gradient-Based Post Training Quantization.\n", - "\n", - "We will use an existing pre-trained YOLOv8n model based on [Ultralytics](https://github.com/ultralytics/ultralytics). The model was slightly adjusted with integrated NMS layer. We will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on COCO dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT of Keras object detection model.\n", - "2. An optional optimization step of Gradient-Based Post Training Quantization. \n", - "3. Data preparation - loading and preprocessing validation and representative datasets from COCO.\n", - "4. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "TF_VER = '2.14.0'\n", - "\n", - "!pip install -q tensorflow=={TF_VER}\n", - "!pip install -q pycocotools\n", - "!pip install 'huggingface-hub<=0.21.4'" - ] - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9728247bc20d0600", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ] - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": [ - "### Download COCO evaluation set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bea492d71b4060f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ] - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", - "\n", - "We begin by loading a pre-trained [YOLOv8n](https://huggingface.co/SSI-DNN/test_keras_yolov8n_640x640) model. This implementation is based on [Ultralytics](https://github.com/ultralytics/ultralytics) and includes a slightly modified version of yolov8 detection-head (mainly the box decoding part) that was adapted for model quantization. For further insights into the model's implementation details, please refer to [MCT Models Library - yolov8](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_keras/yolov8). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "outputs": [], - "source": [ - "from huggingface_hub import from_pretrained_keras\n", - "\n", - "model = from_pretrained_keras('SSI-DNN/keras_yolov8n_640x640_pp')" - ] - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 100 representative images, divided into 20 iterations of 'batch_size' images each. \n", - "\n", - "Additionally, to further compress the model's memory footprint, we will employ the mixed-precision quantization technique. This method allows each layer to be quantized with different precision options: 2, 4, and 8 bits, aligning with the imx500 target platform capabilities. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import CocoDataset, DataLoader\n", - "from tutorials.mct_model_garden.models_keras.yolov8.yolov8_preprocess import yolov8_preprocess\n", - "from typing import Iterator, Tuple, List\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/val2017/'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "BATCH_SIZE = 5\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = CocoDataset(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess)\n", - "\n", - "rep_data_loader = DataLoader(representative_dataset, BATCH_SIZE, shuffle=True)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: DataLoader):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " dataset_loader: an iterable DataLoader \n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\"\n", - " def representative_dataset():\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iters, rep_data_loader)\n", - "\n", - "# Set IMX500-v1 TPC\n", - "tpc = mct.get_target_platform_capabilities(\"tensorflow\", 'imx500', target_platform_version='v1')\n", - "\n", - "# Specify the necessary configuration for mixed precision quantization. \n", - "mp_config = mct.core.MixedPrecisionQuantizationConfig(num_of_images=5, use_hessian_based_scores=False)\n", - "\n", - "# Specify the core configuration. Enable \"shift negative\" correction for better non-linear activation quantization.\n", - "config = mct.core.CoreConfig(mixed_precision_config=mp_config,\n", - " quantization_config=mct.core.QuantizationConfig(shift_negative_activation_correction=True))\n", - "\n", - "# Define target Resource Utilization for mixed precision weights quantization (76% of 'standard' 8bits quantization)\n", - "resource_utilization_data = mct.core.keras_resource_utilization_data(model,\n", - " representative_dataset_gen,\n", - " config,\n", - " target_platform_capabilities=tpc)\n", - "resource_utilization = mct.core.ResourceUtilization(resource_utilization_data.weights_memory * 0.76)\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", - " representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "print('Quantized model is ready')" - ] - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.keras` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "mct.exporter.keras_export_model(model=quant_model, save_model_path='./qmodel.keras')" - ] - }, - { - "cell_type": "markdown", - "id": "ba1ade49894e4e22", - "metadata": { - "collapsed": false - }, - "source": [ - "\n", - "### Gradient-Based Post Training Quantization using Model Compression Toolkit\n", - "Here we demonstrate how to further optimize the quantized model performance using gradient-based PTQ technique.\n", - "**Please note that this section is computationally heavy, and it's recommended to run it on a GPU. For fast deployment, you may choose to skip this step.** \n", - "\n", - "We will start by loading the COCO training set, and re-define the representative dataset accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5276ec7291d28603", - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "if not os.path.isdir('coco/train2017'):\n", - " !wget -nc http://images.cocodataset.org/zips/train2017.zip\n", - " !unzip -q -o train2017.zip -d ./coco\n", - " !echo Done loading train2017 images\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/train2017/'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_train2017.json'\n", - "BATCH_SIZE = 5\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = CocoDataset(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess)\n", - "\n", - "rep_data_loader = DataLoader(representative_dataset, BATCH_SIZE, shuffle=True)\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iters, rep_data_loader)" - ] - }, - { - "cell_type": "markdown", - "id": "fce524abd2f1e750", - "metadata": { - "collapsed": false - }, - "source": [ - "Next, we'll set up the Gradient-Based PTQ configuration and execute the necessary MCT command. Keep in mind that this step can be time-consuming, depending on your runtime." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30f0a0c1c497ba2", - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "# Specify the necessary configuration for Gradient-Based PTQ.\n", - "n_gptq_epochs = 1000\n", - "gptq_config = mct.gptq.get_keras_gptq_config(n_epochs=n_gptq_epochs, use_hessian_based_weights=False)\n", - "\n", - "# Perform Gradient-Based Post Training Quantization\n", - "quant_model, _ = mct.gptq.keras_gradient_post_training_quantization(\n", - " model,\n", - " representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " gptq_config=gptq_config,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "\n", - "print('Quantized model is ready')" - ] - }, - { - "cell_type": "markdown", - "id": "015e760b-6555-45b4-aaf9-500e974c1d86", - "metadata": {}, - "source": [ - "## Evaluation on COCO dataset\n", - "\n", - "### Floating point model evaluation\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model. \n", - "Note that we set the \"batch_size\" to 5 and the preprocessing according to [Ultralytics](https://github.com/ultralytics/ultralytics).\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_evaluate\n", - "\n", - "EVAL_DATASET_FOLDER = './coco/val2017'\n", - "EVAL_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "INPUT_RESOLUTION = 640\n", - "\n", - "# Define resizing information to map between the model's output and the original image dimensions\n", - "output_resize = {'shape': (INPUT_RESOLUTION, INPUT_RESOLUTION), 'aspect_ratio_preservation': True}\n", - "\n", - "# Evaluate the model on coco\n", - "eval_results = coco_evaluate(model=model,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Print float model mAP results\n", - "print(\"Float model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "Lastly, we can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluate the model on coco\n", - "eval_results = coco_evaluate(model=quant_model,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Print quantized model mAP results\n", - "print(\"Quantized model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/__init__.py b/tutorials/notebooks/imx500_notebooks/pytorch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb deleted file mode 100644 index 0f3e42bfe..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb +++ /dev/null @@ -1,398 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false - }, - "source": [ - "# Fast-Depth Estimation - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained pytorch fast-depth model, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", - "\n", - "We will use an existing pre-trained Fast-Depth model based on [Fast-Depth](https://github.com/dwofk/fast-depth). We will quantize the model using MCT post training quantization technique and visualize some samples of the floating point model and the quantized model.\n" - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-10-10T14:08:38.389433Z", - "start_time": "2024-10-10T14:08:22.709164Z" - } - }, - "source": [ - "import torch\n", - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install datasets\n", - "!pip install matplotlib\n", - "!pip install 'huggingface-hub>=0.21.0'" - ], - "outputs": [], - "execution_count": 1 - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - }, - { - "cell_type": "code", - "id": "9728247bc20d0600", - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-10-10T14:08:44.433155Z", - "start_time": "2024-10-10T14:08:38.390775Z" - } - }, - "source": [ - "import importlib\n", - "import sys\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ], - "outputs": [], - "execution_count": 2 - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": "" - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Download a Pre-Trained Model \n", - "\n", - "We begin by downloading a pre-trained Fast-Depth model. This implemetation is based on [Pytorch Fast-Depth](https://github.com/dwofk/fast-depth). " - ] - }, - { - "cell_type": "code", - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": { - "ExecuteTime": { - "end_time": "2024-10-10T14:09:17.693158Z", - "start_time": "2024-10-10T14:08:44.434067Z" - } - }, - "source": [ - "from tutorials.mct_model_garden.models_pytorch.fastdepth.fastdepth import FastDepth\n", - "from model_compression_toolkit.core.pytorch.utils import get_working_device\n", - "model = FastDepth.from_pretrained(\"SSI-DNN/pytorch_fastdepth_224x224\")\n", - "model.eval()\n", - "\n", - "# Move to device\n", - "device = get_working_device()\n", - "model.to(device)" - ], - "outputs": [], - "execution_count": 3 - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "## Quantization\n", - "\n", - "### Post training quantization (PTQ) using Model Compression Toolkit (MCT)\n", - "\n", - "Now, we are all set to use MCT's post-training quantization. To begin, we'll use a representative dataset of lsun-bedrooms and proceed with the model quantization. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. \n", - "\n", - "### Representitive Dataset" - ] - }, - { - "cell_type": "code", - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-10-10T14:09:22.290843Z", - "start_time": "2024-10-10T14:09:17.701423Z" - } - }, - "source": [ - "from torch.utils.data import DataLoader, Dataset\n", - "from torchvision import transforms\n", - "from datasets import load_dataset\n", - "from typing import Iterator, Tuple, List\n", - "\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "class ValDataset(Dataset):\n", - " def __init__(self, dataset):\n", - " super(ValDataset, self).__init__()\n", - " self.dataset = dataset\n", - " self.val_transform = transforms.Compose([\n", - " transforms.Resize((224, 224)),\n", - " transforms.ToTensor()])\n", - "\n", - " def __len__(self):\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " img = self.dataset[index]['image']\n", - " tensor = self.val_transform(img)\n", - " return tensor\n", - "\n", - "dataset = load_dataset(\"pcuenq/lsun-bedrooms\",split=\"test\")\n", - "val_dataset = ValDataset(dataset)\n", - "val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " dataset_loader: iterator object of dataset loader\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters, dataset_loader=val_loader)\n" - ], - "outputs": [], - "execution_count": 4 - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": "### Post-Training Quantization (PTQ)", - "id": "bb1bfcca03ce55c" - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-10-10T14:09:25.749346Z", - "start_time": "2024-10-10T14:09:22.291787Z" - } - }, - "cell_type": "code", - "source": [ - "import model_compression_toolkit as mct\n", - "\n", - "# Set IMX500 TPC\n", - "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", - " target_platform_name='imx500',\n", - " target_platform_version='v3')\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " target_platform_capabilities=tpc)\n", - "\n", - "\n", - "print('Quantized model is ready!')" - ], - "id": "55177376aca838c0", - "outputs": [], - "execution_count": 5 - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment om IMX500, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-10-10T14:09:25.753622Z", - "start_time": "2024-10-10T14:09:25.751902Z" - } - }, - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./model.onnx',\n", - " repr_dataset=representative_dataset_gen)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "43a8a6d11d696b09", - "metadata": { - "collapsed": false - }, - "source": [ - "## Visualize samples from lsun-bedrooms\n", - "Next, we visualize a sample of RGB image along with its depth image from the floating point and the quantized model." - ] - }, - { - "cell_type": "code", - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": { - "ExecuteTime": { - "end_time": "2024-10-10T14:10:55.776013Z", - "start_time": "2024-10-10T14:10:55.017073Z" - } - }, - "source": [ - "import torch\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from PIL import Image\n", - "\n", - "cmap = plt.cm.viridis\n", - "\n", - "def colored_depthmap(depth: np.ndarray, d_min: float = None, d_max: float = None) -> np.ndarray:\n", - " \"\"\"\n", - " This function create depth map for visualization.\n", - " Args:\n", - " depth: depth image\n", - " d_min: minimum depth\n", - " d_max: maximum depth\n", - " Returns:\n", - " A depth map\n", - " \"\"\" \n", - " if d_min is None:\n", - " d_min = np.min(depth)\n", - " if d_max is None:\n", - " d_max = np.max(depth)\n", - " depth_relative = (depth - d_min) / (d_max - d_min)\n", - " return 255 * cmap(depth_relative)[:,:,:3] # H, W, C\n", - "\n", - "def merge_into_row(img: torch.tensor, depth_float: torch.tensor, depth_quant: torch.tensor) -> torch.tensor:\n", - " \"\"\"\n", - " This function that merge output of 2 depth estimation result together for visualization.\n", - " Args:\n", - " img: RGB image\n", - " depth_float: Depth image of floating-point model\n", - " depth_quant: Depth image of quantized model\n", - " Returns:\n", - " A merged image\n", - " \"\"\" \n", - " rgb = 255 * np.transpose(np.squeeze(img.detach().cpu().numpy()), (1,2,0)) # H, W, C\n", - " depth_float = np.squeeze(depth_float.detach().cpu().numpy())\n", - " depth_quant = np.squeeze(depth_quant.detach().cpu().numpy())\n", - "\n", - " d_min = min(np.min(depth_float), np.min(depth_quant))\n", - " d_max = max(np.max(depth_float), np.max(depth_quant))\n", - " depth_float_col = colored_depthmap(depth_float, d_min, d_max)\n", - " depth_quant_col = colored_depthmap(depth_quant, d_min, d_max)\n", - " img_merge = np.hstack([rgb, depth_float_col, depth_quant_col])\n", - " \n", - " return img_merge\n", - "\n", - "\n", - "# Take a sample\n", - "SAMPLE_IDX = 0\n", - "img = val_dataset[SAMPLE_IDX]\n", - "img = img.unsqueeze(0).to(device) # adding batch size\n", - "\n", - "# Inference float-point and quantized models\n", - "depth_float = model(img)\n", - "depth_quant = quant_model(img)\n", - "\n", - "# Create and save image for visualization\n", - "merge_img = merge_into_row(img, depth_float, depth_quant)\n", - "merge_img = Image.fromarray(merge_img.astype('uint8'))\n", - "merge_img.save(\"depth.png\")\n", - "print('Depth image is saved!')" - ], - "outputs": [], - "execution_count": 6 - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_mobilevit_xs_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_mobilevit_xs_for_imx500.ipynb deleted file mode 100644 index aee3f3885..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_mobilevit_xs_for_imx500.ipynb +++ /dev/null @@ -1,367 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false - }, - "source": [ - "# MobileViT-XS PyTorch Model - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_mobilevit_xs_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained MobileViT-XS model from the MCT Models Library, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", - "\n", - "We will use an existing pre-trained MobileViT-XS model based on [Timm](https://github.com/huggingface/pytorch-image-models). The model was slightly adjusted for model quantization, particularly aimed at preventing folding along the batch axis. We will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on ImageNet dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT of PyTorch classification model.\n", - "2. Data preparation - loading and preprocessing validation and representative datasets from ImageNet.\n", - "3. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install timm\n", - "!pip install 'huggingface-hub>=0.21.0'" - ] - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9728247bc20d0600", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import sys\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": [ - "### Download ImageNet validation set\n", - "Download ImageNet dataset with only the validation split.\n", - "\n", - "Note that for demonstration purposes we use the validation set for the model quantization routines. Usually, a subset of the training dataset is used, but loading it is a heavy procedure that is unnecessary for the sake of this demonstration.\n", - "\n", - "This step may take several minutes..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bea492d71b4060f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "if not os.path.isdir('imagenet'):\n", - " !mkdir imagenet\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz\n", - " !mv ILSVRC2012_devkit_t12.tar.gz imagenet/\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar\n", - " !mv ILSVRC2012_img_val.tar imagenet/" - ] - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", - "\n", - "We begin by loading a pre-trained [MobileViT-XS](https://huggingface.co/SSI-DNN/pytorch_mobilevit_xs) model. This implementation is based on [Timm](https://github.com/huggingface/pytorch-image-models) and includes a slightly modified version of timm/Attention module that was adapted for model quantization, particularly aimed at preventing folding along the batch axis. For further insights into the model's implementation details, please refer to [MCT Models Garden - yolov8](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_pytorch/mobilevit_xs). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_pytorch.mobilevit_xs.mobilevit_xs import MobileViTXSPyTorch\n", - "model = MobileViTXSPyTorch.from_pretrained(\"SSI-DNN/pytorch_mobilevit_xs\")" - ] - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. \n", - "\n", - "Additionally, to further compress the model's memory footprint, we will employ the mixed-precision quantization technique. This method allows each layer to be quantized with different precision options: 2, 4, and 8 bits, aligning with the imx500 target platform capabilities." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device\n", - "from timm.data import create_loader, resolve_data_config\n", - "from typing import Iterator, Tuple, List\n", - "import torchvision\n", - "\n", - "\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "IMG_SIZE = 256\n", - "DATA_ARGS = {'img_size': IMG_SIZE}\n", - "device = get_working_device()\n", - "\n", - "# Load representative dataset\n", - "data_config = resolve_data_config(args=DATA_ARGS,\n", - " model=model)\n", - "\n", - "# Extract ImageNet validation dataset using torchvision \"datasets\" module\n", - "val_dataset = torchvision.datasets.ImageNet(root='./imagenet', split='val')\n", - " \n", - "representative_dataset = create_loader(\n", - " val_dataset,\n", - " input_size=data_config['input_size'],\n", - " batch_size=BATCH_SIZE,\n", - " interpolation=data_config['interpolation'],\n", - " mean=data_config['mean'],\n", - " std=data_config['std'],\n", - " crop_pct=data_config['crop_pct'],\n", - " device=device)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=representative_dataset)\n", - "\n", - "# Set IMX500-v1 TPC\n", - "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", - " target_platform_name='imx500',\n", - " target_platform_version='v1')\n", - "\n", - "# We adjusted the quantization configuration to match the model, activated the shift negative activation correction, and set the z-threshold to 7.0. This is a general best practice for transformer architectures.\n", - "ptq_config = mct.core.CoreConfig(quantization_config=mct.core.QuantizationConfig(\n", - " shift_negative_activation_correction=True,\n", - " z_threshold=7.0))\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " core_config=ptq_config,\n", - " target_platform_capabilities=tpc)\n", - "print('Quantized model is ready')" - ] - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./qmodel.onnx',\n", - " repr_dataset=representative_dataset_gen, \n", - " onnx_opset_version=17)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Evaluation on ImageNet dataset\n", - "\n", - "### Floating point model evaluation\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ], - "metadata": { - "collapsed": false - }, - "id": "43a8a6d11d696b09" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.resources.utils.pytorch_tutorial_tools import classification_eval\n", - "\n", - "val_loader = create_loader(\n", - " val_dataset,\n", - " input_size=data_config['input_size'],\n", - " batch_size=BATCH_SIZE,\n", - " interpolation=data_config['interpolation'],\n", - " mean=data_config['mean'],\n", - " std=data_config['std'],\n", - " crop_pct=data_config['crop_pct'],\n", - " device=device)\n", - "\n", - "# Evaluate the model on ImageNet\n", - "eval_results = classification_eval(model, val_loader)\n", - "\n", - "# Print float model Accuracy results\n", - "print(\"Float model Accuracy: {:.4f}\".format(round(100 * eval_results[0], 2)))" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluate the quantized model on ImageNet\n", - "eval_results = classification_eval(quant_model, val_loader)\n", - "\n", - "# Print quantized model Accuracy results\n", - "print(\"Quantized model Accuracy: {:.4f}\".format(round(100 * eval_results[0], 2)))" - ] - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_timm_classification_model_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_timm_classification_model_for_imx500.ipynb deleted file mode 100644 index c7cdc8b8e..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_timm_classification_model_for_imx500.ipynb +++ /dev/null @@ -1,345 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false - }, - "source": [ - "# PyTorch Model from Timm - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_timm_classification_model_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. \n", - "We will use an existing pre-trained model from [Timm](https://github.com/huggingface/pytorch-image-models). The user can choose any other timm model from this list of compatible model for his requirements. \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install timm" - ] - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9728247bc20d0600", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import sys\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": [ - "### Download ImageNet validation set\n", - "Download ImageNet dataset with only the validation split.\n", - "\n", - "Note that for demonstration purposes we use the validation set for the model quantization routines. Usually, a subset of the training dataset is used, but loading it is a heavy procedure that is unnecessary for the sake of this demonstration.\n", - "\n", - "This step may take several minutes..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bea492d71b4060f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "if not os.path.isdir('imagenet'):\n", - " !mkdir imagenet\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz\n", - " !mv ILSVRC2012_devkit_t12.tar.gz imagenet/\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar\n", - " !mv ILSVRC2012_img_val.tar imagenet/" - ] - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model - Please select a Timm model\n", - "The tutorial is pre-configured to download `mobilenet_v2` model. In case you wish to use a different model - please change the model & weights below, based on [Timm](https://github.com/huggingface/pytorch-image-models)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "outputs": [], - "source": [ - "import timm\n", - "\n", - "selected_model = 'mobilenetv2_100.ra_in1k'\n", - "\n", - "model = timm.create_model(selected_model, pretrained=True)\n", - "model.eval()\n" - ] - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device\n", - "from timm.data import create_loader, resolve_data_config\n", - "from typing import Iterator, Tuple, List\n", - "import torchvision\n", - "\n", - "\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "IMG_SIZE = 256\n", - "DATA_ARGS = {'img_size': IMG_SIZE}\n", - "device = get_working_device()\n", - "\n", - "# Load representative dataset\n", - "data_config = resolve_data_config(args=DATA_ARGS,\n", - " model=model)\n", - "\n", - "# Extract ImageNet validation dataset using torchvision \"datasets\" module\n", - "val_dataset = torchvision.datasets.ImageNet(root='./imagenet', split='val')\n", - " \n", - "representative_dataset = create_loader(\n", - " val_dataset,\n", - " input_size=data_config['input_size'],\n", - " batch_size=BATCH_SIZE,\n", - " interpolation=data_config['interpolation'],\n", - " mean=data_config['mean'],\n", - " std=data_config['std'],\n", - " crop_pct=data_config['crop_pct'],\n", - " device=device)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=representative_dataset)\n", - "\n", - "# Perform post training quantization with the default configuration\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(model, representative_dataset_gen)\n", - "print('Quantized model is ready')" - ] - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./qmodel.onnx',\n", - " repr_dataset=representative_dataset_gen,\n", - " onnx_opset_version=17)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Evaluation on ImageNet dataset\n", - "\n", - "### Floating point model evaluation\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ], - "metadata": { - "collapsed": false - }, - "id": "43a8a6d11d696b09" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.resources.utils.pytorch_tutorial_tools import classification_eval\n", - "\n", - "val_loader = create_loader(\n", - " val_dataset,\n", - " input_size=data_config['input_size'],\n", - " batch_size=BATCH_SIZE,\n", - " interpolation=data_config['interpolation'],\n", - " mean=data_config['mean'],\n", - " std=data_config['std'],\n", - " crop_pct=data_config['crop_pct'],\n", - " device=device)\n", - "\n", - "# Evaluate the model on ImageNet\n", - "eval_results = classification_eval(model, val_loader)\n", - "\n", - "# Print float model Accuracy results\n", - "print(\"Float model Accuracy: {:.4f}\".format(round(100 * eval_results[0], 2)))" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluate the quantized model on ImageNet\n", - "eval_results = classification_eval(quant_model, val_loader)\n", - "\n", - "# Print quantized model Accuracy results\n", - "print(\"Quantized model Accuracy: {:.4f}\".format(round(100 * eval_results[0], 2)))" - ] - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_torchvision_classification_model_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_torchvision_classification_model_for_imx500.ipynb deleted file mode 100644 index 309a42195..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_torchvision_classification_model_for_imx500.ipynb +++ /dev/null @@ -1,295 +0,0 @@ -{ - "cells": [ - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "# PyTorch Model from torchvision - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_torchvision_classification_model_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. \n", - "We will use an existing pre-trained model from [torchvision](https://pytorch.org/vision/stable/models.html). The user can choose any torchvision model from this list. " - ], - "id": "ca9e0ba1f92d22fc" - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "## Setup\n", - "### Install the relevant packages" - ], - "id": "7e737ed9d9b11d2a" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "!pip install -q torch\n", - "!pip install -q torchvision\n", - "!pip install -q onnx" - ], - "id": "5250b07975f15b5f", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ], - "id": "b1a05efedd4dbc77" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import sys\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ], - "id": "391a9ed0d178002e", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "### Download ImageNet validation set\n", - "Download ImageNet dataset with only the validation split.\n", - "\n", - "Note that for demonstration purposes we use the validation set for the model quantization routines. Usually, a subset of the training dataset is used, but loading it is a heavy procedure that is unnecessary for the sake of this demonstration.\n", - "\n", - "This step may take several minutes..." - ], - "id": "5923827fab97d1de" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import os\n", - "if not os.path.isdir('imagenet'):\n", - " !mkdir imagenet\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz\n", - " !mv ILSVRC2012_devkit_t12.tar.gz imagenet/\n", - " !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar\n", - " !mv ILSVRC2012_img_val.tar imagenet/" - ], - "id": "ec301c30cd83d535", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "## Model Quantization\n", - "\n", - "### Download a pre-trained model - Please select a Torchvision model\n", - "The tutorial is pre-configured to download `mobilenet_v2` model. In case you wish to use a different model - please change the model & weights below, based on [torchvision](https://pytorch.org/vision/stable/models.html)" - ], - "id": "7059e58ac6efff74" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "from torchvision.models import mobilenet_v2, MobileNet_V2_Weights\n", - "\n", - "model = mobilenet_v2(weights=MobileNet_V2_Weights)\n", - "model.eval()" - ], - "id": "ea84e114c819dde0", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. " - ], - "id": "7d334e3dd747ba68" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import model_compression_toolkit as mct\n", - "from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device\n", - "from typing import Iterator, Tuple, List\n", - "from torch.utils.data import DataLoader\n", - "from torchvision import transforms, datasets\n", - "\n", - "\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "device = get_working_device()\n", - "\n", - "# Define transformations for the validation set\n", - "val_transform = transforms.Compose([\n", - " transforms.Resize(256),\n", - " transforms.CenterCrop(224),\n", - " transforms.ToTensor(),\n", - " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])\n", - "\n", - "# Extract ImageNet validation dataset using torchvision \"datasets\" module\n", - "val_dataset = datasets.ImageNet(root='./imagenet', split='val', transform=val_transform)\n", - "val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters, dataset_loader=val_loader)\n", - "\n", - "# Perform post training quantization with the default configuration\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(model, representative_data_gen=representative_dataset_gen)\n", - "print('Quantized model is ready')" - ], - "id": "c231499204b5fe58", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ], - "id": "2659cdc8ec1a3008" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./torchvision_qmodel.onnx',\n", - " repr_dataset=representative_dataset_gen,\n", - " onnx_opset_version=17)" - ], - "id": "de8dd4dbc0c8d393", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "## Evaluation on ImageNet dataset\n", - "\n", - "### Floating point model evaluation\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ], - "id": "d98c5b8acb2e7397" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "from tutorials.resources.utils.pytorch_tutorial_tools import classification_eval\n", - "# Evaluate the model on ImageNet\n", - "eval_results = classification_eval(model, val_loader)\n", - "\n", - "# Print float model Accuracy results\n", - "print(\"Float model Accuracy: {:.4f}\".format(round(100 * eval_results[0], 2)))" - ], - "id": "6cd077108ef55889", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ], - "id": "eb86aae997a5210a" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "# Evaluate the quantized model on ImageNet\n", - "eval_results = classification_eval(quant_model, val_loader)\n", - "\n", - "# Print quantized model Accuracy results\n", - "print(\"Quantized model Accuracy: {:.4f}\".format(round(100 * eval_results[0], 2)))" - ], - "id": "b881138870761d89", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License" - ], - "id": "a4d344795b87475a" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_for_imx500.ipynb deleted file mode 100644 index 149efeb82..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_for_imx500.ipynb +++ /dev/null @@ -1,587 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false - }, - "source": [ - "# YOLOv8n Object Detection PyTorch Model - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained YOLOv8n model from the MCT Models Library, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", - "\n", - "We will use an existing pre-trained YOLOv8n model based on [Ultralytics](https://github.com/ultralytics/ultralytics). The model was slightly adjusted for model quantization. We will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on COCO dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT of PyTorch object detection model.\n", - "2. Data preparation - loading and preprocessing validation and representative datasets from COCO.\n", - "3. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install -q pycocotools\n", - "!pip install 'huggingface-hub>=0.21.0'\n", - "!pip install sony-custom-layers" - ] - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install mct-nightly\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ], - "metadata": { - "collapsed": false - }, - "id": "b6178c86a2df086" - }, - { - "cell_type": "markdown", - "source": [ - "### Download COCO evaluation set" - ], - "metadata": { - "collapsed": false - }, - "id": "2addc3f2e6fbf402" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ], - "metadata": { - "collapsed": false - }, - "id": "4555a00ab957c2eb" - }, - { - "cell_type": "markdown", - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", - "\n", - "We begin by loading a pre-trained [YOLOv8n](https://huggingface.co/SSI-DNN/pytorch_yolov8n_640x640_bb_decoding) model. This implementation is based on [Ultralytics](https://github.com/ultralytics/ultralytics) and includes a slightly modified version of yolov8 detection-head (mainly the box decoding part) that was adapted for model quantization. For further insights into the model's implementation details, please refer to [MCT Models Garden - yolov8](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_pytorch/yolov8). " - ], - "metadata": { - "collapsed": false - }, - "id": "6a97125e471fae9" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8 import ModelPyTorch, yaml_load, model_predict\n", - "cfg_dict = yaml_load(\"tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n.yaml\", append_filename=True) # model dict\n", - "model = ModelPyTorch.from_pretrained(\"SSI-DNN/pytorch_yolov8n_detection_640x640\", cfg=cfg_dict)\n", - "model.eval()" - ], - "metadata": { - "collapsed": false - }, - "id": "b7d057120847fc3c" - }, - { - "cell_type": "markdown", - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. \n", - "\n", - "Additionally, to further compress the model's memory footprint, we will employ the mixed-precision quantization technique. This method allows each layer to be quantized with different precision options: 2, 4, and 8 bits, aligning with the imx500 target platform capabilities." - ], - "metadata": { - "collapsed": false - }, - "id": "6ecd174ab64a5ff3" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from model_compression_toolkit.core.common.network_editors import NodeNameScopeFilter\n", - "from model_compression_toolkit.core import BitWidthConfig\n", - "import model_compression_toolkit as mct\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_dataset_generator\n", - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8_preprocess import yolov8_preprocess_chw_transpose\n", - "from typing import Iterator, Tuple, List\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/val2017/'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = coco_dataset_generator(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=representative_dataset)\n", - "\n", - "# Set IMX500-v3 TPC (extended support in 16b operations)\n", - "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", - " target_platform_name='imx500',\n", - " target_platform_version='v3')\n", - "\n", - "# Configure MCT manually for specific layers\n", - "manual_bit_cfg = BitWidthConfig()\n", - "manual_bit_cfg.set_manual_activation_bit_width(\n", - " [NodeNameScopeFilter('mul'),\n", - " NodeNameScopeFilter('sub'),\n", - " NodeNameScopeFilter('sub_1'),\n", - " NodeNameScopeFilter('add_6'),\n", - " NodeNameScopeFilter('add_7'),\n", - " NodeNameScopeFilter('stack')], 16)\n", - "\n", - "# Specify the necessary configuration for mixed precision quantization \n", - "config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10),\n", - " quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True),\n", - " bit_width_config=manual_bit_cfg)\n", - "\n", - "# Define target Resource Utilization for mixed precision weights quantization (76% of 'standard' 8bits quantization).\n", - "# We measure the number of parameters to be 3146176 and calculate the target memory (in Bytes).\n", - "resource_utilization = mct.core.ResourceUtilization(weights_memory=3146176 * 0.76)\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "print('Quantized model is ready')" - ], - "metadata": { - "collapsed": false - }, - "id": "cec4035ce185614d" - }, - { - "cell_type": "markdown", - "source": [ - "### Postprocess integration\n", - "Integrate the postprocess to the model using NMS custom layer" - ], - "metadata": { - "collapsed": false - }, - "id": "5fb70430b48edb3" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Wrapped the quantized model with PostProcess NMS.\n", - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8 import PostProcessWrapper\n", - "from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device\n", - "\n", - "# Define PostProcess params\n", - "score_threshold = 0.001\n", - "iou_threshold = 0.7\n", - "max_detections = 300\n", - "\n", - "# Get working device\n", - "device = get_working_device()\n", - "\n", - "quant_model_pp = PostProcessWrapper(model=quant_model,\n", - " score_threshold=score_threshold,\n", - " iou_threshold=iou_threshold,\n", - " max_detections=max_detections).to(device=device)" - ], - "metadata": { - "collapsed": false - }, - "id": "86fe1368d9b501a9" - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model_pp,\n", - " save_model_path='./qmodel_pp.onnx',\n", - " repr_dataset=representative_dataset_gen)" - ] - }, - { - "cell_type": "markdown", - "id": "655d764593af0763", - "metadata": { - "collapsed": false - }, - "source": [ - "### Gradient-Based Post Training Quantization using Model Compression Toolkit\n", - "Here we demonstrate how to further optimize the quantized model performance using gradient-based PTQ technique.\n", - "**Please note that this section is computationally heavy, and it's recommended to run it on a GPU. For fast deployment, you may choose to skip this step.** \n", - "\n", - "We will start by loading the COCO training set, and re-define the representative dataset accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20fe96b6cc95d38c", - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "!wget -nc http://images.cocodataset.org/zips/train2017.zip\n", - "!unzip -q -o train2017.zip -d ./coco\n", - "!echo Done loading train2017 images\n", - "\n", - "GPTQ_REPRESENTATIVE_DATASET_FOLDER = './coco/train2017/'\n", - "GPTQ_REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_train2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "gptq_representative_dataset = coco_dataset_generator(dataset_folder=GPTQ_REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=GPTQ_REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Get representative dataset generator\n", - "gptq_representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=gptq_representative_dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "29d54f733139d114", - "metadata": { - "collapsed": false - }, - "source": [ - "Next, we'll set up the Gradient-Based PTQ configuration and execute the necessary MCT command. Keep in mind that this step can be time-consuming, depending on your runtime." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "240421e00f6cce34", - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "# Specify the necessary configuration for Gradient-Based PTQ.\n", - "n_gptq_epochs = 1000\n", - "gptq_config = mct.gptq.get_pytorch_gptq_config(n_epochs=n_gptq_epochs, use_hessian_based_weights=False)\n", - "\n", - "# Perform Gradient-Based Post Training Quantization\n", - "gptq_quant_model, _ = mct.gptq.pytorch_gradient_post_training_quantization(\n", - " model=model,\n", - " representative_data_gen=gptq_representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " gptq_config=gptq_config,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "\n", - "print('Quantized-GPTQ model is ready')\n", - "\n", - "gptq_quant_model_pp = PostProcessWrapper(model=gptq_quant_model,\n", - " score_threshold=score_threshold,\n", - " iou_threshold=iou_threshold,\n", - " max_detections=max_detections).to(device=device)" - ] - }, - { - "cell_type": "markdown", - "id": "b5d72e8420550101", - "metadata": { - "collapsed": false - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "546ff946af81702b", - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=gptq_quant_model_pp,\n", - " save_model_path='./qmodel_gptq_pp.onnx',\n", - " repr_dataset=gptq_representative_dataset_gen)" - ] - }, - { - "cell_type": "markdown", - "id": "43a8a6d11d696b09", - "metadata": { - "collapsed": false - }, - "source": [ - "## Evaluation on COCO dataset\n", - "\n", - "### Floating point model evaluation\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model. \n", - "Note that we set the \"batch_size\" to 4 and the preprocessing according to [Ultralytics](https://github.com/ultralytics/ultralytics).\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_evaluate\n", - "\n", - "EVAL_DATASET_FOLDER = './coco/val2017'\n", - "EVAL_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "INPUT_RESOLUTION = 640\n", - "\n", - "# Define resizing information to map between the model's output and the original image dimensions\n", - "output_resize = {'shape': (INPUT_RESOLUTION, INPUT_RESOLUTION), 'aspect_ratio_preservation': True, 'normalized_coords': False}\n", - "\n", - "# Wrapped the model with PostProcess NMS.\n", - "# Define PostProcess params\n", - "score_threshold = 0.001\n", - "iou_threshold = 0.7\n", - "max_detections = 300\n", - "\n", - "model_pp = PostProcessWrapper(model=model,\n", - " score_threshold=score_threshold,\n", - " iou_threshold=iou_threshold,\n", - " max_detections=max_detections).to(device=device)\n", - "\n", - "# Evaluate the model on coco\n", - "eval_results = coco_evaluate(model=model_pp,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=model_predict)\n", - "\n", - "# Print float model mAP results\n", - "print(\"Float model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluate the quantized model with PostProcess on coco\n", - "eval_results = coco_evaluate(model=quant_model_pp,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=model_predict)\n", - "\n", - "# Print quantized model mAP results\n", - "print(\"Quantized model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "3bb5cc7c91dc8f21", - "metadata": { - "collapsed": false - }, - "source": [ - "Finally, we can evaluate the performance of the quantized model through GPTQ (Gradient-Based/Enhanced Post Training Quantization). We anticipate an improvement in performance compare to the quantized model utilizing PTQ." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "168468f17ae8bc59", - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "# Evaluate the quantized using GPTQ model with PostProcess on coco\n", - "eval_results = coco_evaluate(model=gptq_quant_model_pp,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=model_predict)\n", - "\n", - "# Print quantized using GPTQ model mAP results\n", - "print(\"Quantized using GPTQ model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_pose_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_pose_for_imx500.ipynb deleted file mode 100644 index 7405b55d6..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_pose_for_imx500.ipynb +++ /dev/null @@ -1,529 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false - }, - "source": [ - "# YOLOv8n Object Detection PyTorch Model - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_pose_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained YOLOv8n model from the MCT Models Library, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", - "\n", - "We will use an existing pre-trained YOLOv8n pose estimation model based on [Ultralytics](https://github.com/ultralytics/ultralytics). The model was slightly adjusted for model quantization. We will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on COCO dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT of PyTorch pose estimation model.\n", - "2. Data preparation - loading and preprocessing validation and representative datasets from COCO.\n", - "3. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install -q pycocotools\n", - "!pip install 'huggingface-hub>=0.21.0'" - ] - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9728247bc20d0600", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ] - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": [ - "### Download COCO evaluation set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bea492d71b4060f", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ] - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", - "\n", - "We begin by loading a pre-trained [YOLOv8n](https://huggingface.co/SSI-DNN/pytorch_yolov8n_640x640) model. This implementation is based on [Ultralytics](https://github.com/ultralytics/ultralytics) and includes a slightly modified version of yolov8 pose-head that was adapted for model quantization. For further insights into the model's implementation details, please refer to [MCT Models Garden - yolov8](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_pytorch/yolov8). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8 import ModelPyTorch, yaml_load\n", - "\n", - "# Load the adjusted model from hugging-face\n", - "cfg_dict = yaml_load(\"tutorials/mct_model_garden/models_pytorch/yolov8/yolov8n-pose.yaml\", append_filename=True)\n", - "model = ModelPyTorch.from_pretrained(\"SSI-DNN/pytorch_yolov8n_640x640\", cfg=cfg_dict)\n", - "\n", - "# Ensure the model is in evaluation mode\n", - "model = model.eval()" - ] - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "### Post training quantization using Model Compression Toolkit \n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. \n", - "\n", - "Additionally, to further compress the model's memory footprint, we will employ the mixed-precision quantization technique. This method allows each layer to be quantized with different precision options: 2, 4, and 8 bits, aligning with the imx500 target platform capabilities." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import CocoDataset, DataLoader\n", - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8_preprocess import yolov8_preprocess_chw_transpose\n", - "from typing import Iterator, Tuple, List\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/val2017/'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/person_keypoints_val2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "representative_dataset = CocoDataset(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose)\n", - "\n", - "representative_dataloader = DataLoader(representative_dataset, BATCH_SIZE, shuffle=True)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=representative_dataloader)\n", - "\n", - "# Set IMX500-v1 TPC\n", - "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", - " target_platform_name='imx500',\n", - " target_platform_version='v1')\n", - "\n", - "# Specify the necessary configuration for mixed precision quantization. To keep the tutorial brief, we'll use a small set of images and omit the hessian metric for mixed precision calculations. It's important to be aware that this choice may impact the resulting accuracy. \n", - "mp_config = mct.core.MixedPrecisionQuantizationConfig(num_of_images=5)\n", - "config = mct.core.CoreConfig(mixed_precision_config=mp_config,\n", - " quantization_config=mct.core.QuantizationConfig(shift_negative_activation_correction=True,\n", - " concat_threshold_update=True))\n", - "\n", - "# Define target Resource Utilization for mixed precision weights quantization (80% of 'standard' 8bits quantization)\n", - "resource_utilization_data = mct.core.pytorch_resource_utilization_data(in_model=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "resource_utilization = mct.core.ResourceUtilization(weights_memory=resource_utilization_data.weights_memory * 0.8)\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "print('Quantized model is ready')" - ] - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./qmodel.onnx',\n", - " repr_dataset=representative_dataset_gen)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Gradient-Based Post Training Quantization using Model Compression Toolkit\n", - "Here we demonstrate how to further optimize the quantized model performance using gradient-based PTQ technique.\n", - "**Please note that this section is computationally heavy, and it's recommended to run it on a GPU. For fast deployment, you may choose to skip this step.** \n", - "\n", - "We will start by loading the COCO training set, and re-define the representative dataset accordingly. " - ], - "metadata": { - "collapsed": false - }, - "id": "655d764593af0763" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir('coco/train2017'):\n", - " !wget -nc http://images.cocodataset.org/zips/train2017.zip\n", - " !unzip -q -o train2017.zip -d ./coco\n", - " !echo Done loading train2017 images\n", - "\n", - "GPTQ_REPRESENTATIVE_DATASET_FOLDER = './coco/train2017/'\n", - "GPTQ_REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/person_keypoints_train2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = CocoDataset(dataset_folder=GPTQ_REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=GPTQ_REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose)\n", - "\n", - "representative_dataloader = DataLoader(representative_dataset, BATCH_SIZE, shuffle=True)\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=representative_dataloader)" - ], - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "id": "20fe96b6cc95d38c" - }, - { - "cell_type": "markdown", - "source": [ - "Next, we'll set up the Gradient-Based PTQ configuration and execute the necessary MCT command. Keep in mind that this step can be time-consuming, depending on your runtime." - ], - "metadata": { - "collapsed": false - }, - "id": "29d54f733139d114" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Specify the necessary configuration for Gradient-Based PTQ.\n", - "n_gptq_epochs = 1000\n", - "gptq_config = mct.gptq.get_pytorch_gptq_config(n_epochs=n_gptq_epochs, use_hessian_based_weights=False)\n", - "\n", - "# Perform Gradient-Based Post Training Quantization\n", - "gptq_quant_model, _ = mct.gptq.pytorch_gradient_post_training_quantization(\n", - " model=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " gptq_config=gptq_config,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "\n", - "print('Quantized-GPTQ model is ready')" - ], - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "id": "240421e00f6cce34" - }, - { - "cell_type": "markdown", - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ], - "metadata": { - "collapsed": false - }, - "id": "b5d72e8420550101" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=gptq_quant_model,\n", - " save_model_path='./qmodel_gptq.onnx',\n", - " repr_dataset=representative_dataset_gen)" - ], - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "id": "546ff946af81702b" - }, - { - "cell_type": "markdown", - "source": [ - "## Evaluation on COCO dataset\n", - "\n", - "### Floating point model evaluation\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model. \n", - "Note that we set the \"batch_size\" to 4 and the preprocessing according to [Ultralytics](https://github.com/ultralytics/ultralytics).\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ], - "metadata": { - "collapsed": false - }, - "id": "43a8a6d11d696b09" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8 import keypoints_model_predict\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_evaluate\n", - "from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device\n", - "\n", - "EVAL_DATASET_FOLDER = './coco/val2017'\n", - "EVAL_DATASET_ANNOTATION_FILE = './coco/annotations/person_keypoints_val2017.json'\n", - "INPUT_RESOLUTION = 640\n", - "\n", - "# Define resizing information to map between the model's output and the original image dimensions\n", - "output_resize = {'shape': (INPUT_RESOLUTION, INPUT_RESOLUTION), 'aspect_ratio_preservation': True}\n", - "\n", - "# Evaluate the model on coco\n", - "eval_results = coco_evaluate(model=model.to(get_working_device()),\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=keypoints_model_predict,\n", - " task='Keypoints')\n", - "\n", - "# Print float model mAP results\n", - "print(\"Float model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluate the quantized model with PostProcess on coco\n", - "eval_results = coco_evaluate(model=quant_model.to(get_working_device()),\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=keypoints_model_predict,\n", - " task='Keypoints')\n", - "\n", - "# Print quantized model mAP results\n", - "print(\"Quantized model mAP: {:.4f}\".format(eval_results[0]))" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Finally, we can evaluate the performance of the quantized model through GPTQ (Gradient-Based/Enhanced Post Training Quantization). We anticipate an improvement in performance compare to the quantized model utilizing PTQ." - ], - "metadata": { - "collapsed": false - }, - "id": "3bb5cc7c91dc8f21" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Evaluate the quantized using GPTQ model with PostProcess on coco\n", - "eval_results = coco_evaluate(model=gptq_quant_model.to(get_working_device()),\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=keypoints_model_predict,\n", - " task='Keypoints')\n", - "\n", - "# Print quantized using GPTQ model mAP results\n", - "print(\"Quantized using GPTQ model mAP: {:.4f}\".format(eval_results[0]))" - ], - "metadata": { - "collapsed": false, - "tags": [ - "long_run" - ] - }, - "id": "168468f17ae8bc59" - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_seg_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_seg_for_imx500.ipynb deleted file mode 100644 index d7609ae3c..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_seg_for_imx500.ipynb +++ /dev/null @@ -1,639 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false, - "id": "fab9d9939dc74da4" - }, - "source": [ - "# YOLOv8n Object Detection PyTorch Model - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolov8n_seg_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained YOLOv8n instance segmentation model from the MCT Models Library, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", - "\n", - "We will use an existing pre-trained YOLOv8n instance segmentation model based on [Ultralytics](https://github.com/ultralytics/ultralytics). The model was slightly adjusted for model quantization. We will quantize the model using MCT post training quantization and evaluate the performance of the floating point model and the quantized model on COCO dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT of PyTorch object detection model.\n", - "2. Data preparation - loading and preprocessing validation and representative datasets from COCO.\n", - "3. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false, - "id": "d74f9c855ec54081" - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fa04c9903736f", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7c7fa04c9903736f", - "outputId": "51eab6ab-4821-4cd4-9210-3561fd15a09c" - }, - "outputs": [], - "source": [ - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install -q pycocotools\n", - "!pip install 'huggingface-hub>=0.21.0'" - ] - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false, - "id": "57717bc8f59a0d85" - }, - "source": [ - " Clone a copy of the [MCT](https://github.com/sony/model_optimization) (Model Compression Toolkit) into your current directory. This step ensures that you have access to [MCT Models Garden](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden) folder which contains all the necessary utility functions for this tutorial.\n", - " **It's important to note that we use the most up-to-date MCT code available.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9728247bc20d0600", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9728247bc20d0600", - "outputId": "e4d117a5-b62c-477d-f1fd-aa51daadd10e" - }, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ] - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false, - "id": "7a1038b9fd98bba2" - }, - "source": [ - "### Download COCO evaluation set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bea492d71b4060f", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "8bea492d71b4060f", - "outputId": "ad92251a-1893-4d38-9322-7cb7ffb3f9c8" - }, - "outputs": [], - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ] - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": { - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38" - }, - "source": [ - "## Model Quantization\n", - "\n", - "### Download a Pre-Trained Model\n", - "\n", - "We begin by loading a pre-trained [YOLOv8n](https://huggingface.co/SSI-DNN/pytorch_yolov8n_inst_seg_640x640) model. This implementation is based on [Ultralytics](https://github.com/ultralytics/ultralytics) and includes a slightly modified version of yolov8 detection and segmentation head that was adapted for model quantization. For further insights into the model's implementation details, please refer to [MCT Models Garden - yolov8](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_pytorch/yolov8). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "NDogtE_0ANsL", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NDogtE_0ANsL", - "outputId": "b7942fd3-02a1-4126-98c9-387c4bc90748" - }, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8 import ModelPyTorch, yaml_load\n", - "cfg_dict = yaml_load(\"./tutorials/mct_model_garden/models_pytorch/yolov8/yolov8-seg.yaml\", append_filename=True) # model dict\n", - "model = ModelPyTorch.from_pretrained(\"SSI-DNN/pytorch_yolov8n_inst_seg_640x640\", cfg=cfg_dict, mode='segmentation')" - ] - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": { - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767" - }, - "source": [ - "### Post training quantization using Model Compression Toolkit\n", - "\n", - "Now, we're all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 100 representative images, divided into 20 iterations of 'batch_size' images each.\n", - "\n", - "Additionally, to further compress the model's memory footprint, we will employ the mixed-precision quantization technique. This method allows each layer to be quantized with different precision options: 2, 4, and 8 bits, aligning with the imx500 target platform capabilities." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "id": "56393342-cecf-4f64-b9ca-2f515c765942" - }, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_dataset_generator\n", - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8_preprocess import yolov8_preprocess_chw_transpose\n", - "from typing import Iterator, Tuple, List\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/val2017/'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = coco_dataset_generator(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Define representative dataset generator\n", - "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " n_iter: number of iterations for MCT to calibrate on\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\"\n", - " def representative_dataset() -> Iterator[List]:\n", - " ds_iter = iter(dataset_loader)\n", - " for _ in range(n_iter):\n", - " yield [next(ds_iter)[0]]\n", - "\n", - " return representative_dataset\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=representative_dataset)\n", - "\n", - "# Set IMX500-v1 TPC\n", - "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", - " target_platform_name='imx500',\n", - " target_platform_version='v1')\n", - "\n", - "# Specify the necessary configuration for mixed precision quantization. To keep the tutorial brief, we'll use a small set of images and omit the hessian metric for mixed precision calculations. It's important to be aware that this choice may impact the resulting accuracy.\n", - "mp_config = mct.core.MixedPrecisionQuantizationConfig(num_of_images=5,\n", - " use_hessian_based_scores=False)\n", - "config = mct.core.CoreConfig(mixed_precision_config=mp_config,\n", - " quantization_config=mct.core.QuantizationConfig(shift_negative_activation_correction=True))\n", - "\n", - "# Define target Resource Utilization for mixed precision weights quantization (75% of 'standard' 8bits quantization)\n", - "resource_utilization_data = mct.core.pytorch_resource_utilization_data(in_model=model,\n", - " representative_data_gen=\n", - " representative_dataset_gen,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n", - "resource_utilization = mct.core.ResourceUtilization(weights_memory=resource_utilization_data.weights_memory * 0.75)\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", - " representative_data_gen=\n", - " representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)\n" - ] - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false, - "id": "3be2016acdc9da60" - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dd885c7b92fa93", - "metadata": { - "id": "72dd885c7b92fa93" - }, - "outputs": [], - "source": [ - "import model_compression_toolkit as mct\n", - "\n", - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./quant_model.onnx',\n", - " repr_dataset=representative_dataset_gen)" - ] - }, - { - "cell_type": "markdown", - "id": "655d764593af0763", - "metadata": { - "collapsed": false, - "id": "655d764593af0763" - }, - "source": [ - "### Gradient-Based Post Training Quantization using Model Compression Toolkit\n", - "Here we demonstrate how to further optimize the quantized model performance using gradient-based PTQ technique.\n", - "**Please note that this section is computationally heavy, and it's recommended to run it on a GPU. For fast deployment, you may choose to skip this step.**\n", - "\n", - "We will start by loading the COCO training set, and re-define the representative dataset accordingly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20fe96b6cc95d38c", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "20fe96b6cc95d38c", - "outputId": "22b0be33-ef7b-490a-82ae-7eb02a3474a2", - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "!wget -nc http://images.cocodataset.org/zips/train2017.zip\n", - "!unzip -q -o train2017.zip -d ./coco\n", - "!echo Done loading train2017 images\n", - "\n", - "GPTQ_REPRESENTATIVE_DATASET_FOLDER = './coco/train2017/'\n", - "GPTQ_REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_train2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 20\n", - "\n", - "# Load representative dataset\n", - "gptq_representative_dataset = coco_dataset_generator(dataset_folder=GPTQ_REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=GPTQ_REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolov8_preprocess_chw_transpose,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "# Get representative dataset generator\n", - "gptq_representative_dataset_gen = get_representative_dataset(n_iter=n_iters,\n", - " dataset_loader=gptq_representative_dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "29d54f733139d114", - "metadata": { - "collapsed": false, - "id": "29d54f733139d114" - }, - "source": [ - "Next, we'll set up the Gradient-Based PTQ configuration and execute the necessary MCT command. Keep in mind that this step can be time-consuming, depending on your runtime. We recomend for the best results increase n_gptq_epochs to > 1000 " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "240421e00f6cce34", - "metadata": { - "id": "240421e00f6cce34", - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "# Specify the necessary configuration for Gradient-Based PTQ.\n", - "n_gptq_epochs = 15 # for best results increase this value to 1000\n", - "gptq_config = mct.gptq.get_pytorch_gptq_config(n_epochs=n_gptq_epochs, use_hessian_based_weights=False)\n", - "\n", - "# Perform Gradient-Based Post Training Quantization\n", - "gptq_quant_model, _ = mct.gptq.pytorch_gradient_post_training_quantization(\n", - " model=model,\n", - " representative_data_gen=gptq_representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " gptq_config=gptq_config,\n", - " core_config=config,\n", - " target_platform_capabilities=tpc)" - ] - }, - { - "cell_type": "markdown", - "id": "b5d72e8420550101", - "metadata": { - "collapsed": false, - "id": "b5d72e8420550101" - }, - "source": [ - "### Model Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. This can be converted with sdsp to imx500 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "546ff946af81702b", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "546ff946af81702b", - "outputId": "cf627960-7b8b-423c-8cae-fbccddcb76f3", - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "mct.exporter.pytorch_export_model(model=gptq_quant_model,\n", - " save_model_path='./qmodel_gptq.onnx',\n", - " repr_dataset=gptq_representative_dataset_gen)" - ] - }, - { - "cell_type": "markdown", - "id": "43a8a6d11d696b09", - "metadata": { - "collapsed": false, - "id": "43a8a6d11d696b09" - }, - "source": [ - "## Evaluation on COCO dataset\n", - "\n", - "### Floating point model evaluation\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model.\n", - "Please ensure that the dataset path has been set correctly before running this code cell. Adjust img_ids_limit based on your runtime. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "FPahWaGApRsf", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FPahWaGApRsf", - "outputId": "8917ad48-88f2-476d-852e-fa6a3f067919" - }, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.models_pytorch.yolov8.yolov8 import seg_model_predict\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import evaluate_yolov8_segmentation\n", - "from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device\n", - "device = get_working_device()\n", - "model = model.to(device)\n", - "evaluate_yolov8_segmentation(model, seg_model_predict, data_dir='coco', data_type='val2017', img_ids_limit=100, output_file='results.json', iou_thresh=0.7, conf=0.001, max_dets=300,mask_thresh=0.55)" - ] - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": { - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb" - }, - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "WudMfYEOsEFK", - "metadata": { - "id": "WudMfYEOsEFK" - }, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import evaluate_yolov8_segmentation\n", - "evaluate_yolov8_segmentation(quant_model, seg_model_predict, data_dir='coco', data_type='val2017', img_ids_limit=100, output_file='results_quant.json', iou_thresh=0.7, conf=0.001, max_dets=300,mask_thresh=0.55)" - ] - }, - { - "cell_type": "markdown", - "id": "3bb5cc7c91dc8f21", - "metadata": { - "collapsed": false, - "id": "3bb5cc7c91dc8f21" - }, - "source": [ - "### Gradient quant Evaluation\n", - "Finally, we can evaluate the performance of the quantized model through GPTQ (Gradient-Based/Enhanced Post Training Quantization). We anticipate an improvement in performance compare to the quantized model utilizing PTQ." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "VLwCbC2_szpy", - "metadata": { - "id": "VLwCbC2_szpy", - "tags": [ - "long_run" - ] - }, - "outputs": [], - "source": [ - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import evaluate_yolov8_segmentation\n", - "evaluate_yolov8_segmentation(gptq_quant_model, seg_model_predict, data_dir='coco', data_type='val2017', img_ids_limit=100, output_file='results_g_quant.json', iou_thresh=0.7, conf=0.001, max_dets=300,mask_thresh=0.55)" - ] - }, - { - "cell_type": "markdown", - "id": "G-IcwtruCh9P", - "metadata": { - "id": "G-IcwtruCh9P" - }, - "source": [ - "### Visulise Predictions\n", - "\n", - "Finally we can visulise the predictions. Code segment below displays the predictions used for evaluation against the ground truth for an image. To view the output of a different model run evaluation for a said model and align the results.json file below.\n", - "A random set of images are displayed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "PXiLCy1j92kE", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "PXiLCy1j92kE", - "outputId": "f6251c47-5665-4c77-ddc0-780f40401a6a" - }, - "outputs": [], - "source": [ - "import cv2\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "from pycocotools.coco import COCO\n", - "import json\n", - "import random\n", - "\n", - "# Number of sets to display\n", - "num_sets = 20\n", - "\n", - "# adjust results file name to view quant and gradient quant\n", - "with open('results.json', 'r') as file:\n", - " results = json.load(file)\n", - "\n", - "# Extract unique image IDs from the results\n", - "result_imgIds = list({result['image_id'] for result in results})\n", - "\n", - "dataDir = 'coco'\n", - "dataType = 'val2017'\n", - "annFile = f'{dataDir}/annotations/instances_{dataType}.json'\n", - "resultsFile = 'results.json'\n", - "cocoGt = COCO(annFile)\n", - "cocoDt = cocoGt.loadRes(resultsFile)\n", - "plt.figure(figsize=(15, 7 * num_sets))\n", - "\n", - "for i in range(num_sets):\n", - " random_imgId = random.choice(result_imgIds)\n", - " img = cocoGt.loadImgs(random_imgId)[0]\n", - " image_path = f'{dataDir}/{dataType}/{img[\"file_name\"]}'\n", - " image = cv2.imread(image_path)\n", - " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB\n", - "\n", - " plt.subplot(num_sets, 2, 2*i + 1)\n", - " plt.imshow(image)\n", - " plt.axis('off')\n", - " plt.title(f'Ground Truth {random_imgId}')\n", - "\n", - " # Load and display ground truth annotations with bounding boxes\n", - " annIds = cocoGt.getAnnIds(imgIds=img['id'], iscrowd=None)\n", - " anns = cocoGt.loadAnns(annIds)\n", - " for ann in anns:\n", - " cocoGt.showAnns([ann], draw_bbox=True)\n", - " # Draw category ID on the image\n", - " bbox = ann['bbox']\n", - " plt.text(bbox[0], bbox[1], str(ann['category_id']), color='white', fontsize=12, bbox=dict(facecolor='red', alpha=0.5))\n", - "\n", - " plt.subplot(num_sets, 2, 2*i + 2)\n", - " plt.imshow(image)\n", - " plt.axis('off')\n", - " plt.title(f'Model Output {random_imgId}')\n", - "\n", - " # Load and display model predictions with bounding boxes\n", - " annIdsDt = cocoDt.getAnnIds(imgIds=img['id'])\n", - " annsDt = cocoDt.loadAnns(annIdsDt)\n", - " for ann in annsDt:\n", - " cocoDt.showAnns([ann], draw_bbox=True)\n", - " # Draw category ID on the image\n", - " bbox = ann['bbox']\n", - " plt.text(bbox[0], bbox[1], str(ann['category_id']), color='white', fontsize=12, bbox=dict(facecolor='blue', alpha=0.5))\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "2dbb8c1d", - "metadata": {}, - "source": [ - "### Summary\n", - "\n", - "In this notebook we load weights of yolov8n_instance_segmentation model quantise said model with both ptq and gradient based methods, evaluate and finally show the user a method for visulisation." - ] - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false, - "id": "6d93352843a27433" - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolox-tiny_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolox-tiny_for_imx500.ipynb deleted file mode 100644 index 3a945ecb8..000000000 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolox-tiny_for_imx500.ipynb +++ /dev/null @@ -1,374 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fab9d9939dc74da4", - "metadata": { - "collapsed": false - }, - "source": [ - "# YOLOX-Tiny Object Detection - Quantization for IMX500\n", - "\n", - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_yolox-tiny_for_imx500.ipynb)\n", - "\n", - "## Overview\n", - "\n", - "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained pytorch YOLOX-Tiny model, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", - "\n", - "We will use an existing pre-trained YOLOX-Tiny model based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) and integrate Box decoding and NMS to the model. The model was slightly adjusted for model quantization. We will quantize the model using MCT post training quantization technique and evaluate the performance of the floating point model and the quantized model on COCO dataset.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization (PTQ) using MCT of YoloX object detection model.\n", - "2. Data preparation: loading and preprocessing validation and representative datasets from COCO.\n", - "3. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "d74f9c855ec54081", - "metadata": { - "collapsed": false - }, - "source": [ - "## Setup\n", - "### Install the relevant packages" - ] - }, - { - "cell_type": "code", - "id": "7c7fa04c9903736f", - "metadata": { - "collapsed": false - }, - "source": [ - "!pip install -q torch\n", - "!pip install onnx\n", - "!pip install -q pycocotools\n", - "!pip install 'sony-custom-layers'" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "57717bc8f59a0d85", - "metadata": { - "collapsed": false - }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] - }, - { - "cell_type": "code", - "id": "9728247bc20d0600", - "metadata": { - "collapsed": false - }, - "source": [ - "import sys\n", - "import os\n", - "import importlib\n", - "\n", - "if not importlib.util.find_spec('model_compression_toolkit'):\n", - " !pip install model_compression_toolkit\n", - "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", - "sys.path.insert(0,\"tutorials\")" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": [ - "### Download COCO evaluation set" - ] - }, - { - "cell_type": "code", - "id": "8bea492d71b4060f", - "metadata": { - "collapsed": false - }, - "source": [ - "if not os.path.isdir('coco'):\n", - " !wget -nc http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n", - " !unzip -q -o annotations_trainval2017.zip -d ./coco\n", - " !echo Done loading annotations\n", - " !wget -nc http://images.cocodataset.org/zips/val2017.zip\n", - " !unzip -q -o val2017.zip -d ./coco\n", - " !echo Done loading val2017 images" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", - "metadata": {}, - "source": [ - "## Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", - "\n", - "We begin by downloading a pre-trained YOLOX-Tiny model from [YOLOX github](https://github.com/Megvii-BaseDetection/YOLOX). This implementation is based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) and includes a slightly modified version of YOLOX detection-head (mainly the box decoding part) that was adapted for model quantization. For further insights into the model's implementation details, please refer to [MCT Models Garden - YOLOX](https://github.com/sony/model_optimization/tree/main/tutorials/mct_model_garden/models_pytorch/yolox). " - ] - }, - { - "cell_type": "code", - "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, - "source": [ - "# Download YOLOX-Tiny\n", - "!wget -nc https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.pth\n", - "\n", - "from tutorials.mct_model_garden.models_pytorch.yolox.yolox import YOLOX\n", - "import yaml\n", - "\n", - "yaml_path = \"tutorials/mct_model_garden/models_pytorch/yolox/yolox.yaml\"\n", - "with open(yaml_path, 'r', encoding='utf-8') as f:\n", - " yolox_cfg = yaml.safe_load(f)\n", - "\n", - "yolox_tiny_cfg = yolox_cfg['tiny']\n", - "model = YOLOX(yolox_tiny_cfg)\n", - "model.load_weights(\"yolox_tiny.pth\")\n", - "model.eval()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", - "metadata": {}, - "source": [ - "### Post training quantization (PTQ) using Model Compression Toolkit (MCT)\n", - "\n", - "Now, we are all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 30 iterations of 'batch_size' images each. \n" - ] - }, - { - "cell_type": "code", - "id": "56393342-cecf-4f64-b9ca-2f515c765942", - "metadata": { - "collapsed": false - }, - "source": [ - "import model_compression_toolkit as mct\n", - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_dataset_generator\n", - "from tutorials.mct_model_garden.models_pytorch.yolox.yolox_preprocess import yolox_preprocess_chw_transpose\n", - "from tutorials.mct_model_garden.models_pytorch.yolox.yolox import YOLOXPostProcess\n", - "from typing import Iterator\n", - "\n", - "REPRESENTATIVE_DATASET_FOLDER = './coco/val2017/'\n", - "REPRESENTATIVE_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "BATCH_SIZE = 4\n", - "n_iters = 30\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = coco_dataset_generator(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolox_preprocess_chw_transpose,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "\n", - "def get_representative_dataset(dataset: Iterator, n_iter: int):\n", - " \"\"\"\n", - " This function creates a representative dataset generator. The generator yields numpy\n", - " arrays of batches of shape: [Batch, H, W ,C].\n", - " Args:\n", - " dataset: dataset iterator\n", - " n_iter: number of iterations for MCT for calibration\n", - " Returns:\n", - " A representative dataset generator\n", - " \"\"\" \n", - " def _generator():\n", - " for _ind in range(n_iter):\n", - " batch, label = next(iter(dataset))\n", - " yield [batch]\n", - "\n", - " return _generator\n", - "\n", - "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(dataset=representative_dataset, n_iter=n_iters)\n", - "\n", - "# Set IMX500 TPC\n", - "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", - " target_platform_name='imx500',\n", - " target_platform_version='v3')\n", - "\n", - "# Define target Resource Utilization for mixed precision weights quantization.\n", - "# Number of parameters of YOLOx-Tiny is 5M and we set target memory (in Bytes) of 87% of 'standard' 8-bit quantization.\n", - "resource_utilization = mct.core.ResourceUtilization(weights_memory=5e6 * 0.87)\n", - "\n", - "# Perform post training quantization\n", - "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", - " representative_data_gen=representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", - " target_platform_capabilities=tpc)\n", - "\n", - "# Integrate the quantized model with box decoder and NMS\n", - "quant_model = YOLOXPostProcess(quant_model)\n", - "\n", - "print('Quantized model is ready!')" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "3be2016acdc9da60", - "metadata": { - "collapsed": false - }, - "source": [ - "### Export\n", - "\n", - "Now, we can export the quantized model, ready for deployment om IMX500, into a `.onnx` format file. Please ensure that the `save_model_path` has been set correctly. " - ] - }, - { - "cell_type": "code", - "id": "72dd885c7b92fa93", - "metadata": { - "collapsed": false - }, - "source": [ - "mct.exporter.pytorch_export_model(model=quant_model,\n", - " save_model_path='./model.onnx',\n", - " repr_dataset=representative_dataset_gen)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "43a8a6d11d696b09", - "metadata": { - "collapsed": false - }, - "source": [ - "## Evaluation on COCO dataset\n", - "\n", - "### Floating point model evaluation\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model. \n", - "Note that we set the preprocessing according to [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX).\n", - "Please ensure that the dataset path has been set correctly before running this code cell." - ] - }, - { - "cell_type": "code", - "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, - "source": [ - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_evaluate\n", - "from tutorials.mct_model_garden.models_pytorch.yolox.yolox import model_predict\n", - "\n", - "EVAL_DATASET_FOLDER = './coco/val2017'\n", - "EVAL_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "\n", - "# Define boxes resizing information to map between the model's output and the original image dimensions\n", - "output_resize = {'shape': yolox_tiny_cfg['img_size'], 'aspect_ratio_preservation': True, \"align_center\": False, 'normalized_coords': False}\n", - "\n", - "# Integrate the floating-point model with box decoder and NMS\n", - "model = YOLOXPostProcess(model)\n", - "\n", - "# Evaluate the floating-point model\n", - "eval_results = coco_evaluate(model=model,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolox_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=model_predict)\n", - "\n", - "print(\"Floating-point model mAP: {:.4f}\".format(eval_results[0]))" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "4fb6bffc-23d1-4852-8ec5-9007361c8eeb", - "metadata": {}, - "source": [ - "### Quantized model evaluation\n", - "We can evaluate the performance of the quantized model. There is a slight decrease in performance that can be further mitigated by either expanding the representative dataset or employing MCT's advanced quantization methods, such as GPTQ (Gradient-Based/Enhanced Post Training Quantization)." - ] - }, - { - "cell_type": "code", - "id": "8dc7b87c-a9f4-4568-885a-fe009c8f4e8f", - "metadata": {}, - "source": [ - "# Evaluate quantized model\n", - "eval_results = coco_evaluate(model=quant_model,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolox_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=model_predict)\n", - "\n", - "print(\"Quantized model mAP: {:.4f}\".format(eval_results[0]))" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "6d93352843a27433", - "metadata": { - "collapsed": false - }, - "source": [ - "\\\n", - "Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/notebooks/mct_features_notebooks/README.md b/tutorials/notebooks/mct_features_notebooks/README.md deleted file mode 100644 index f94a2626b..000000000 --- a/tutorials/notebooks/mct_features_notebooks/README.md +++ /dev/null @@ -1,118 +0,0 @@ -# MCT Features -This tutorial set introduces the various quantization tools offered by MCT. -The notebooks included here illustrate the setup and usage of both basic and advanced post-training quantization methods. -You'll learn how to refine PTQ (Post-Training Quantization) settings, export models, and explore advanced compression -techniques such as GPTQ (Gradient-Based Post-Training Quantization), Mixed precision quantization and more. -These techniques are essential for further optimizing models and achieving superior performance in deployment scenarios. - -### Keras Tutorials - -
- Post-Training Quantization (PTQ) - - | Tutorial | Included Features | - |--------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------| - | [Basic Post-Training Quantization (PTQ)](keras/example_keras_post-training_quantization.ipynb) | ✅ PTQ | - | [MobileNetV2](../imx500_notebooks/keras/example_keras_mobilenetv2_for_imx500.ipynb) | ✅ PTQ | - | [Mixed-Precision MobileNetV2](keras/example_keras_mobilenet_mixed_precision.ipynb) | ✅ PTQ
✅ Mixed-Precision | - | [Nanodet-Plus](../imx500_notebooks/keras/example_keras_nanodet_plus_for_imx500.ipynb) | ✅ PTQ | - | [EfficientDetLite0](../imx500_notebooks/keras/example_keras_effdet_lite0_for_imx500.ipynb) | ✅ PTQ
✅ [sony-custom-layers](https://github.com/sony/custom_layers) integration | - -
- -
- Gradient-Based Post-Training Quantization (GPTQ) - - | Tutorial | Included Features | - |------------------------------|---------------| - | [MobileNetV2](keras/example_keras_mobilenet_gptq.ipynb) | ✅ GPTQ | - -
- -
- Quantization-Aware Training (QAT) - - | Tutorial | Included Features | - |---------------------------------------------------|--------------| - | [QAT on MNIST](keras/example_keras_qat.ipynb) | ✅ QAT | - -
- - -
- Structured Pruning - - | Tutorial | Included Features | - |---------------------------------------------------------------------|------------------| - | [Fully-Connected Model Pruning](keras/example_keras_pruning_mnist.ipynb) | ✅ Pruning | - -
- -
- Export Quantized Models - - | Tutorial | Included Features | - |---------------------------------------------------------------------------------------|-----------------| - | [Exporter Usage](keras/example_keras_export.ipynb) | ✅ Export | - -
- -
- Debug Tools - - | Tutorial | Included Features | - |-------------------------------------------------------------------------------------|-------------------------| - | [Network Editor Usage](keras/example_keras_network_editor.ipynb) | ✅ Network Editor | - -
- -### Pytorch Tutorials - - -
- Post-Training Quantization (PTQ) - - | Tutorial | Included Features | - |-----------------------------------------------------------------------------------------------------------|---------------------------------------------| - | [Basic Post-Training Quantization (PTQ)](pytorch/example_pytorch_post_training_quantization.ipynb) | ✅ PTQ | - | [Mixed-Precision Post-Training Quantization](pytorch/example_pytorch_mixed_precision_ptq.ipynb) | ✅ PTQ
✅ Mixed-Precision | - | [Advanced Gradient-Based Post-Training Quantization (GPTQ)](pytorch/example_pytorch_mobilenet_gptq.ipynb) | ✅ GPTQ | - -
- -
- Structured Pruning - - | Tutorial | Included Features | - |--------------------------------------------------------------------------------------|------------------| - | [Fully-Connected Model Pruning](pytorch/example_pytorch_pruning_mnist.ipynb) | ✅ Pruning | - - -
- -
- Data Generation - - | Tutorial | Included Features | - |-----------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| - | [Zero-Shot Quantization (ZSQ) using Data Generation](pytorch/example_pytorch_data_generation.ipynb) | ✅ PTQ
✅ ZSQ
✅ Data-Free Quantization
✅ Data Generation | - -
- - -
- Export Quantized Models - - | Tutorial | Included Features | - |---------------------------------------------------------------------------------------|-----------------| - | [Exporter Usage](pytorch/example_pytorch_export.ipynb) | ✅ Export | - -
-
- Quantization Troubleshooting - - | Tutorial | Included Features | - |------------------------------------------------------------------------------------------------|-------------------| - | [Quantization Troubleshooting using the Xquant Feature](pytorch/example_pytorch_xquant.ipynb) | ✅ Debug | - -
diff --git a/tutorials/notebooks/mct_features_notebooks/__init__.py b/tutorials/notebooks/mct_features_notebooks/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/notebooks/mct_features_notebooks/keras/__init__.py b/tutorials/notebooks/mct_features_notebooks/keras/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/__init__.py b/tutorials/notebooks/mct_features_notebooks/pytorch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/mct_model_garden/models_keras/utils/__init__.py b/tutorials/pytorch/__init__.py similarity index 100% rename from tutorials/mct_model_garden/models_keras/utils/__init__.py rename to tutorials/pytorch/__init__.py diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_data_generation.ipynb b/tutorials/pytorch/example_pytorch_data_generation.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_data_generation.ipynb rename to tutorials/pytorch/example_pytorch_data_generation.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_export.ipynb b/tutorials/pytorch/example_pytorch_export.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_export.ipynb rename to tutorials/pytorch/example_pytorch_export.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_mixed_precision_ptq.ipynb b/tutorials/pytorch/example_pytorch_mixed_precision_ptq.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_mixed_precision_ptq.ipynb rename to tutorials/pytorch/example_pytorch_mixed_precision_ptq.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_mobilenet_gptq.ipynb b/tutorials/pytorch/example_pytorch_mobilenet_gptq.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_mobilenet_gptq.ipynb rename to tutorials/pytorch/example_pytorch_mobilenet_gptq.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_post_training_quantization.ipynb b/tutorials/pytorch/example_pytorch_post_training_quantization.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_post_training_quantization.ipynb rename to tutorials/pytorch/example_pytorch_post_training_quantization.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_pruning_mnist.ipynb b/tutorials/pytorch/example_pytorch_pruning_mnist.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_pruning_mnist.ipynb rename to tutorials/pytorch/example_pytorch_pruning_mnist.ipynb diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_xquant.ipynb b/tutorials/pytorch/example_pytorch_xquant.ipynb similarity index 100% rename from tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_xquant.ipynb rename to tutorials/pytorch/example_pytorch_xquant.ipynb diff --git a/tutorials/resources/scripts/prepare_imagenet.sh b/tutorials/resources/scripts/prepare_imagenet.sh deleted file mode 100644 index de15b8099..000000000 --- a/tutorials/resources/scripts/prepare_imagenet.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -rm -rf imagenet/val -mkdir imagenet/val -tar -xf imagenet/ILSVRC2012_img_val.tar -C imagenet/val -wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt -O imagenet/val/imagenet_2012_validation_synset_labels.txt -cd imagenet/val; find . -name "*.JPEG" | sort > images.txt -cd imagenet/val; function zip34() { while read word3 <&3; do read word4 <&4 ; echo $word3 $word4 ; done }; zip34 3 Callable: - """ - Selects and returns the appropriate preprocessing function based on the model version. - - Returns: - Callable: A function that can be used to preprocess input data for the specified model version. - - Raises: - ValueError: If the model version is unsupported. - """ - if self.model_version == 'MobileNet': - return tf.keras.applications.mobilenet.preprocess_input - elif self.model_version == 'MobileNetV2': - return tf.keras.applications.mobilenet_v2.preprocess_input - elif self.model_version == 'MobileNetV3Small': - return tf.keras.applications.mobilenet_v3.preprocess_input - elif self.model_version == 'EfficientNetB1': - return tf.keras.applications.efficientnet.preprocess_input - elif self.model_version == 'Xception': - return tf.keras.applications.xception.preprocess_input - elif self.model_version == 'DenseNet121': - return tf.keras.applications.densenet.preprocess_input - elif self.model_version == 'NASNetMobile': - return tf.keras.applications.nasnet.preprocess_input - else: - raise ValueError(f"Unsupported model version: {self.model_version}") - def preprocess_dataset(self, images, labels): - return self.preprocess_input(images), labels - - - - def get_validation_dataset_fraction(self, fraction, test_dataset_folder, batch) -> tf.data.Dataset: - """ - Load a fraction of the validation dataset for evaluation. - - Args: - fraction (float, optional): Fraction of the dataset to load. Defaults to 1.0 (i.e., the entire dataset). - test_dataset_folder (str): location of dataset - batch (int): batch size when loading dataset. - - Returns: - tf.data.Dataset: A fraction of the validation dataset. - """ - assert 0 < fraction <= 1, "Fraction must be between 0 and 1." - - # Load the dataset to determine the total number of samples - initial_dataset = tf.keras.utils.image_dataset_from_directory( - directory=test_dataset_folder, - batch_size=1, # Use batch size of 1 to count samples - image_size=[224, 224], - shuffle=False, - crop_to_aspect_ratio=True, - interpolation='bilinear') - - total_samples = initial_dataset.cardinality().numpy() - samples_to_take = int(total_samples * fraction) - - # reload the dataset again with batch size + take number of samples - dataset = tf.keras.utils.image_dataset_from_directory( - directory=test_dataset_folder, - batch_size=batch, - image_size=[224, 224], - shuffle=False, - crop_to_aspect_ratio=True, - interpolation='bilinear') - - # Preprocess the dataset - dataset = dataset.map(self.preprocess_dataset) - # Take the calculated number of samples (adjusted for batch size) - dataset = dataset.take(samples_to_take // batch + (1 if samples_to_take % batch else 0)) - - return dataset - - - def get_representative_dataset(self, fraction, representative_dataset_folder, batch) -> Generator: - """ - A function that loads a fraction of the dataset and returns a representative dataset generator. - - Args: - fraction (float): The fraction of the dataset to load. Defaults to 1.0 (the entire dataset). - test_dataset_folder (str): location of dataset - batch (int): batch size when loading dataset. - - Returns: - Generator: A generator yielding batches of preprocessed images. - """ - assert 0 < fraction <= 1, "Fraction must be between 0 and 1." - - print('Loading dataset, this may take a few minutes ...') - dataset = tf.keras.utils.image_dataset_from_directory( - directory=representative_dataset_folder, - batch_size=batch, - image_size=[224, 224], - shuffle=True, - crop_to_aspect_ratio=True, - interpolation='bilinear') - - # Preprocess the data - dataset = dataset.map(self.preprocess_dataset) - - # Determine the total number of batches in the dataset - total_batches = dataset.cardinality().numpy() - if total_batches == tf.data.experimental.INFINITE_CARDINALITY: - raise ValueError("Dataset size is infinite. A finite dataset is required to compute a fraction.") - - # Calculate the number of batches to use, based on the specified fraction - batches_to_use = int(total_batches * fraction) - - def representative_dataset() -> Generator: - """A generator function that yields batches of preprocessed images.""" - for image_batch, _ in dataset.take(batches_to_use): - yield image_batch.numpy() - - print('images in representative dataset: '+ str(batch*batches_to_use)) - - return representative_dataset - - diff --git a/tutorials/resources/utils/pytorch_tutorial_tools.py b/tutorials/resources/utils/pytorch_tutorial_tools.py deleted file mode 100644 index 7c6326602..000000000 --- a/tutorials/resources/utils/pytorch_tutorial_tools.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -import torch -import logging -from tqdm import tqdm - - -def classification_eval(model, data_loader, limit=None, num_images_to_display=1000): - logging.info(f'Start classification evaluation') - correct = 0 - total = 0 - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model.to(device) - model.eval() - # since we're not training, we don't need to calculate the gradients for our outputs - with torch.no_grad(): - for data in tqdm(data_loader, desc="Classification evaluation"): - images, labels = data - # calculate outputs by running images through the network - outputs = model(images.to(device)) - # the class with the highest energy is what we choose as prediction - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels.to(device)).sum().item() - if total % num_images_to_display == 0: - print(f'Num of images: {total}, Accuracy: {round(100 * correct / total, 2)} %') - if limit and total >= int(limit): - break - - logging.info(f'Num of images: {total}, Accuracy: {round(100 * correct / total, 2)} %') - - return correct / total, total - - -def get_representative_dataset(data_loader, n_iters, data_loader_key=0, transforms=None): - class RepresentativeDataset(object): - def __init__(self, in_data_loader): - self.dl = in_data_loader - self.iter = iter(self.dl) - - def __call__(self): - for _ in range(n_iters): - try: - x = next(self.iter)[data_loader_key] - except StopIteration: - self.iter = iter(self.dl) - x = next(self.iter)[data_loader_key] - if transforms is not None: - x = transforms(x.float()) - yield [x.cpu().numpy()] - - return RepresentativeDataset(data_loader)