From 10a03deac4feb6e0a790a210f3a5b9c53aefb3ae Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 21:19:46 +0800 Subject: [PATCH 1/8] Update README --- README.md | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 1ae8d2b..7ac32f0 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,13 @@ pip install keras kimm ### Use Pretrained Model +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14WxYgVjlwCIO9MwqPYW-dskbTL2UHsVN?usp=sharing) + ```python -from keras import random +import cv2 +import keras +from keras import ops +from keras.applications.imagenet_utils import decode_predictions import kimm @@ -40,20 +45,21 @@ print(kimm.list_models("efficientnet", weights="imagenet")) # fuzzy search # Initialize the model with pretrained weights model = kimm.models.EfficientNetV2B0(weights="imagenet") -# Predict -x = random.uniform([1, 192, 192, 3]) * 255.0 -y = model.predict(x) -print(y.shape) - -# Initialize the model as a feature extractor with pretrained weights -model = kimm.models.EfficientNetV2B0( - feature_extractor=True, weights="imagenet" +# Load an image as the model input +image_path = keras.utils.get_file( + "african_elephant.jpg", "https://i.imgur.com/Bvro0YD.png" ) +image = cv2.imread(image_path) +image = cv2.resize(image, (image_size, image_size)) +x = ops.convert_to_tensor(image) +x = ops.expand_dims(x, axis=0) -# Extract features for downstream tasks -y = model.predict(x) -print(y.keys()) -print(y["BLOCK5_S32"].shape) +# Initialize the model with pretrained weights +model = kimm.models.EfficientNetV2B0() + +# Predict +preds = model.predict(inputs) +print("Predicted:", decode_predictions(preds, top=3)[0]) ``` ### Transfer Learning @@ -91,6 +97,14 @@ y = model.predict(x) print(y.shape) ``` +#### An end-to-end example: fine-tuning an image classification model on a cats vs. dogs dataset + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1IbqfqG2NKEOKvBOznIPT1kjOdVPfThmd?usp=sharing) + +Reference: + +[https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset](https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset) + ## License Please refer to [timm](https://github.com/huggingface/pytorch-image-models#licenses) as this project is built upon it. From 5cc592145b2f89aad3112537b17b78210a26aa94 Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 21:40:51 +0800 Subject: [PATCH 2/8] Update `README` --- README.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7ac32f0..85917da 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # Keras Image Models
-KIMM +KIMM [![PyPI](https://img.shields.io/pypi/v/kimm)](https://pypi.org/project/kimm/) [![Contributions Welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/james77777778/kimm/issues) @@ -43,7 +43,8 @@ print(kimm.list_models()) print(kimm.list_models("efficientnet", weights="imagenet")) # fuzzy search # Initialize the model with pretrained weights -model = kimm.models.EfficientNetV2B0(weights="imagenet") +model = kimm.models.EfficientNetV2B0() +image_size = model._default_size # Load an image as the model input image_path = keras.utils.get_file( @@ -54,14 +55,18 @@ image = cv2.resize(image, (image_size, image_size)) x = ops.convert_to_tensor(image) x = ops.expand_dims(x, axis=0) -# Initialize the model with pretrained weights -model = kimm.models.EfficientNetV2B0() - # Predict -preds = model.predict(inputs) +preds = model.predict(x) print("Predicted:", decode_predictions(preds, top=3)[0]) ``` +```bash +['ConvMixer1024D20', 'ConvMixer1536D20', 'ConvMixer736D32', 'ConvNeXtAtto', ...] +['EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2', 'EfficientNetB3', ...] +1/1 ━━━━━━━━━━━━━━━━━━━━ 11s 11s/step +Predicted: [('n02504458', 'African_elephant', 0.90578836), ('n01871265', 'tusker', 0.024864597), ('n02504013', 'Indian_elephant', 0.01161992)] +``` + ### Transfer Learning ```python @@ -101,6 +106,12 @@ print(y.shape) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1IbqfqG2NKEOKvBOznIPT1kjOdVPfThmd?usp=sharing) +
+kimm_prediction_0 + +kimm_prediction_0 +
+ Reference: [https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset](https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset) From d384e78086c248fc7ce5a961de60c9cb1cf79728 Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 22:39:54 +0800 Subject: [PATCH 3/8] Update `README` --- README.md | 69 +++++++++++++++++++++++-------------------------------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 85917da..e7283a4 100644 --- a/README.md +++ b/README.md @@ -67,54 +67,43 @@ print("Predicted:", decode_predictions(preds, top=3)[0]) Predicted: [('n02504458', 'African_elephant', 0.90578836), ('n01871265', 'tusker', 0.024864597), ('n02504013', 'Indian_elephant', 0.01161992)] ``` -### Transfer Learning - -```python -from keras import layers -from keras import models -from keras import random - -import kimm - -# Initialize the model as a backbone with pretrained weights -backbone = kimm.models.EfficientNetV2B0( - input_shape=[224, 224, 3], - include_top=False, - pooling="avg", - weights="imagenet", -) - -# Freeze the backbone for transfer learning -backbone.trainable = False - -# Construct the model with new head -inputs = layers.Input([224, 224, 3]) -x = backbone(inputs, training=False) -x = layers.Dropout(0.2)(x) -outputs = layers.Dense(2)(x) -model = models.Model(inputs, outputs) - -# Train the new model (put your own logic here) - -# Predict -x = random.uniform([1, 224, 224, 3]) * 255.0 -y = model.predict(x) -print(y.shape) -``` - -#### An end-to-end example: fine-tuning an image classification model on a cats vs. dogs dataset +### An end-to-end example: fine-tuning an image classification model on a cats vs. dogs dataset [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1IbqfqG2NKEOKvBOznIPT1kjOdVPfThmd?usp=sharing)
kimm_prediction_0 -kimm_prediction_0 +kimm_prediction_1
-Reference: - -[https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset](https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset) +Reference: [Transfer learning & fine-tuning (keras.io)](https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset) + +## Model Zoo + +|Model|Paper|Weights are ported from| +|-|-|-| +|ConvMixer|[ICLR 2022 Submission](https://arxiv.org/abs/2201.09792)|`timm`| +|ConvNeXt|[CVPR 2022](https://arxiv.org/abs/2201.03545)|`timm`| +|DenseNet|[CVPR 2017](https://arxiv.org/abs/1608.06993)|`timm`| +|EfficientNet|[ICML 2019](https://arxiv.org/abs/1905.11946)|`timm`| +|EfficientNetLite|[ICML 2019](https://arxiv.org/abs/1905.11946)|`timm`| +|EfficientNetV2|[ICML 2021](https://arxiv.org/abs/2104.00298)|`timm`| +|GhostNet|[CVPR 2020](https://arxiv.org/abs/1911.11907)|`timm`| +|GhostNetV2|[NeurIPS 2022](https://arxiv.org/abs/2211.12905)|`timm`| +|InceptionV3|[CVPR 2016](https://arxiv.org/abs/1512.00567)|`timm`| +|LCNet|[arXiv 2021](https://arxiv.org/abs/2109.15099)|`timm`| +|MobileNetV2|[CVPR 2018](https://arxiv.org/abs/1801.04381)|`timm`| +|MobileNetV3|[ICCV 2019](https://arxiv.org/abs/1905.02244)|`timm`| +|MobileViT|[ICLR 2022](https://arxiv.org/abs/2110.02178)|`timm`| +|RegNet|[CVPR 2020](https://arxiv.org/abs/2003.13678)|`timm`| +|ResNet|[CVPR 2015](https://arxiv.org/abs/1512.03385)|`timm`| +|TinyNet|[NeurIPS 2020](https://arxiv.org/abs/2010.14819)|`timm`| +|VGG|[ICLR 2015](https://arxiv.org/abs/1409.1556)|`timm`| +|ViT|[ICLR 2021](https://arxiv.org/abs/2010.11929)|`timm`| +|Xception|[CVPR 2017](https://arxiv.org/abs/1610.02357)|`keras`| + +The export scripts can be found in `tools/convert_*.py`. ## License From b2bc43b1dcf27d664268857daf7594bdef4281cb Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 22:46:52 +0800 Subject: [PATCH 4/8] Update README --- README.md | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index e7283a4..60f73ef 100644 --- a/README.md +++ b/README.md @@ -81,27 +81,27 @@ Reference: [Transfer learning & fine-tuning (keras.io)](https://keras.io/guides/ ## Model Zoo -|Model|Paper|Weights are ported from| -|-|-|-| -|ConvMixer|[ICLR 2022 Submission](https://arxiv.org/abs/2201.09792)|`timm`| -|ConvNeXt|[CVPR 2022](https://arxiv.org/abs/2201.03545)|`timm`| -|DenseNet|[CVPR 2017](https://arxiv.org/abs/1608.06993)|`timm`| -|EfficientNet|[ICML 2019](https://arxiv.org/abs/1905.11946)|`timm`| -|EfficientNetLite|[ICML 2019](https://arxiv.org/abs/1905.11946)|`timm`| -|EfficientNetV2|[ICML 2021](https://arxiv.org/abs/2104.00298)|`timm`| -|GhostNet|[CVPR 2020](https://arxiv.org/abs/1911.11907)|`timm`| -|GhostNetV2|[NeurIPS 2022](https://arxiv.org/abs/2211.12905)|`timm`| -|InceptionV3|[CVPR 2016](https://arxiv.org/abs/1512.00567)|`timm`| -|LCNet|[arXiv 2021](https://arxiv.org/abs/2109.15099)|`timm`| -|MobileNetV2|[CVPR 2018](https://arxiv.org/abs/1801.04381)|`timm`| -|MobileNetV3|[ICCV 2019](https://arxiv.org/abs/1905.02244)|`timm`| -|MobileViT|[ICLR 2022](https://arxiv.org/abs/2110.02178)|`timm`| -|RegNet|[CVPR 2020](https://arxiv.org/abs/2003.13678)|`timm`| -|ResNet|[CVPR 2015](https://arxiv.org/abs/1512.03385)|`timm`| -|TinyNet|[NeurIPS 2020](https://arxiv.org/abs/2010.14819)|`timm`| -|VGG|[ICLR 2015](https://arxiv.org/abs/1409.1556)|`timm`| -|ViT|[ICLR 2021](https://arxiv.org/abs/2010.11929)|`timm`| -|Xception|[CVPR 2017](https://arxiv.org/abs/1610.02357)|`keras`| +|Model|Paper|Weights are ported from|API| +|-|-|-|-| +|ConvMixer|[ICLR 2022 Submission](https://arxiv.org/abs/2201.09792)|`timm`|`kimm.models.ConvMixer*`| +|ConvNeXt|[CVPR 2022](https://arxiv.org/abs/2201.03545)|`timm`|`kimm.models.ConvNeXt*`| +|DenseNet|[CVPR 2017](https://arxiv.org/abs/1608.06993)|`timm`|`kimm.models.DenseNet*`| +|EfficientNet|[ICML 2019](https://arxiv.org/abs/1905.11946)|`timm`|`kimm.models.EfficientNet*`| +|EfficientNetLite|[ICML 2019](https://arxiv.org/abs/1905.11946)|`timm`|`kimm.models.EfficientNetLite*`| +|EfficientNetV2|[ICML 2021](https://arxiv.org/abs/2104.00298)|`timm`|`kimm.models.EfficientNetV2*`| +|GhostNet|[CVPR 2020](https://arxiv.org/abs/1911.11907)|`timm`|`kimm.models.GhostNet*`| +|GhostNetV2|[NeurIPS 2022](https://arxiv.org/abs/2211.12905)|`timm`|`kimm.models.GhostNetV2*`| +|InceptionV3|[CVPR 2016](https://arxiv.org/abs/1512.00567)|`timm`|`kimm.models.InceptionV3`| +|LCNet|[arXiv 2021](https://arxiv.org/abs/2109.15099)|`timm`|`kimm.models.LCNet*`| +|MobileNetV2|[CVPR 2018](https://arxiv.org/abs/1801.04381)|`timm`|`kimm.models.MobileNetV2*`| +|MobileNetV3|[ICCV 2019](https://arxiv.org/abs/1905.02244)|`timm`|`kimm.models.MobileNetV3*`| +|MobileViT|[ICLR 2022](https://arxiv.org/abs/2110.02178)|`timm`|`kimm.models.MobileViT*`| +|RegNet|[CVPR 2020](https://arxiv.org/abs/2003.13678)|`timm`|`kimm.models.RegNet*`| +|ResNet|[CVPR 2015](https://arxiv.org/abs/1512.03385)|`timm`|`kimm.models.ResNet*`| +|TinyNet|[NeurIPS 2020](https://arxiv.org/abs/2010.14819)|`timm`|`kimm.models.TinyNet*`| +|VGG|[ICLR 2015](https://arxiv.org/abs/1409.1556)|`timm`|`kimm.models.VGG*`| +|ViT|[ICLR 2021](https://arxiv.org/abs/2010.11929)|`timm`|`kimm.models.VisionTransformer*`| +|Xception|[CVPR 2017](https://arxiv.org/abs/1610.02357)|`keras`|`kimm.models.Xception`| The export scripts can be found in `tools/convert_*.py`. From 426d5dca5f29d8aab957720511f60a04b2c98f30 Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 22:52:07 +0800 Subject: [PATCH 5/8] Update `README` --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 60f73ef..422771d 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ pip install keras kimm ## Quickstart -### Use Pretrained Model +### Image Classification Using the Model Pretrained on ImageNet [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14WxYgVjlwCIO9MwqPYW-dskbTL2UHsVN?usp=sharing) From acabafd8660c22a814219afb4d23625c2ec16e92 Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 23:10:29 +0800 Subject: [PATCH 6/8] Update `README` --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 422771d..fb0c63f 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,8 @@ Predicted: [('n02504458', 'African_elephant', 0.90578836), ('n01871265', 'tusker [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1IbqfqG2NKEOKvBOznIPT1kjOdVPfThmd?usp=sharing) +Using `kimm.models.EfficientNetLiteB0`: +
kimm_prediction_0 @@ -79,6 +81,17 @@ Predicted: [('n02504458', 'African_elephant', 0.90578836), ('n01871265', 'tusker Reference: [Transfer learning & fine-tuning (keras.io)](https://keras.io/guides/transfer_learning/#an-endtoend-example-finetuning-an-image-classification-model-on-a-cats-vs-dogs-dataset) +### Grad-CAM + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1h25VmsYDOLL6BNbRPEVOh1arIgcEoHu6?usp=sharing) + +Using `kimm.models.MobileViTS`: + +grad_cam +
+ +Reference: [Grad-CAM class activation visualization (keras.io)](https://keras.io/examples/vision/grad_cam/) + ## Model Zoo |Model|Paper|Weights are ported from|API| From 721dee06a278761426c5c3e4829d426a405829b9 Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 23:12:16 +0800 Subject: [PATCH 7/8] Update image --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fb0c63f..db9e241 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ Reference: [Transfer learning & fine-tuning (keras.io)](https://keras.io/guides/ Using `kimm.models.MobileViTS`: -grad_cam +grad_cam
Reference: [Grad-CAM class activation visualization (keras.io)](https://keras.io/examples/vision/grad_cam/) From 4ec75b355fff64914802e02c0c42a319ff9ff9cd Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Sat, 20 Jan 2024 23:13:00 +0800 Subject: [PATCH 8/8] Update aligning --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index db9e241..e4ae4da 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ Reference: [Transfer learning & fine-tuning (keras.io)](https://keras.io/guides/ Using `kimm.models.MobileViTS`: +
grad_cam