diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index 22d57e6a3d6..d54174a9e71 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -16,6 +16,7 @@ is_document, ) +from io import BytesIO from pathlib import Path import os import tarfile @@ -27,6 +28,7 @@ import inspect import json import sys +import base64 try: from chromadb.is_thin_client import is_thin_client @@ -740,6 +742,74 @@ def __call__(self, input: Union[Documents, Images]) -> Embeddings: return embeddings +class RoboflowEmbeddingFunction(EmbeddingFunction[Union[Documents, Images]]): + def __init__( + self, api_key: str = "", api_url = "https://infer.roboflow.com" + ) -> None: + """ + Create a RoboflowEmbeddingFunction. + + Args: + api_key (str): Your API key for the Roboflow API. + api_url (str, optional): The URL of the Roboflow API. Defaults to "https://infer.roboflow.com". + """ + if not api_key: + api_key = os.environ.get("ROBOFLOW_API_KEY") + + self._api_url = api_url + self._api_key = api_key + + try: + self._PILImage = importlib.import_module("PIL.Image") + except ImportError: + raise ValueError( + "The PIL python package is not installed. Please install it with `pip install pillow`" + ) + + def __call__(self, input: Union[Documents, Images]) -> Embeddings: + embeddings = [] + + for item in input: + if is_image(item): + image = self._PILImage.fromarray(item) + + buffer = BytesIO() + image.save(buffer, format="JPEG") + base64_image = base64.b64encode(buffer.getvalue()).decode("utf-8") + + infer_clip_payload = { + "image": { + "type": "base64", + "value": base64_image, + }, + } + + res = requests.post( + f"{self._api_url}/clip/embed_image?api_key={self._api_key}", + json=infer_clip_payload, + ) + + result = res.json()['embeddings'] + + embeddings.append(result[0]) + + elif is_document(item): + infer_clip_payload = { + "text": input, + } + + res = requests.post( + f"{self._api_url}/clip/embed_text?api_key={self._api_key}", + json=infer_clip_payload, + ) + + result = res.json()['embeddings'] + + embeddings.append(result[0]) + + return embeddings + + class AmazonBedrockEmbeddingFunction(EmbeddingFunction[Documents]): def __init__( self, @@ -885,7 +955,6 @@ def __call__(self, input: Documents) -> Embeddings: ], ) - # List of all classes in this module _classes = [ name diff --git a/examples/use_with/roboflow/embeddings.ipynb b/examples/use_with/roboflow/embeddings.ipynb new file mode 100644 index 00000000000..b5d03d445a9 --- /dev/null +++ b/examples/use_with/roboflow/embeddings.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "wFnbuH7qqotV" + }, + "source": [ + "# Use Roboflow with Chroma\n", + "\n", + "With [Roboflow Inference](https://inference.roboflow.com), you can calculate image embeddings using CLIP, a popular multimodal embedding model. You can then store these embeddings in Chroma for use in your application.\n", + "\n", + "In this guide, we are going to discuss how to load image embeddings into Chroma. We will discuss:\n", + "\n", + "1. How to set up Roboflow Inference\n", + "2. How to create a Chroma vector database\n", + "3. How to calculate CLIP embeddings with Inference\n", + "4. How to run a search query with Chroma\n", + "\n", + "## What is Roboflow Inference?\n", + "\n", + "[Roboflow Inference](https://inference.roboflow.com) is a scalable server through which you can run fine-tuned object detection, segmentation, and classification models, as well as popular foundation models such as CLIP.\n", + "\n", + "Inference handles all of the complexity associated with running vision models, from managing dependencies to maintaining your environment.\n", + "\n", + "Inference is trusted by enterprises around the world to manage vision models, with the hosted version powering millions of API calls each month.\n", + "\n", + "Inference runs in Docker and provides a HTTP interface through which to retrieve predictions.\n", + "\n", + "We will use Inference to calculate CLIP embeddings for our application.\n", + "\n", + "There are two ways to use Inference:\n", + "\n", + "1. On your device\n", + "2. Through the Inference API hosted by Roboflow\n", + "\n", + "In this guide, we will use the hosted Inference API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "coXj8QiRrXfw" + }, + "source": [ + "### Step #1: Create a Chroma Vector Database\n", + "\n", + "To load and save image embeddings into Chroma, we first need images to embed. In this guide, we are going to use the COCO 128 dataset, a collection of 128 images from the Microsoft COCO dataset. This dataset is available on Roboflow Universe, a community that has shared more than 250,000 public computer vision datasets.\n", + "\n", + "To download the dataset, visit the COCO 128 web page, click “Download Dataset” and click \"show download code\" to get a download code:\n", + "\n", + "![COCO 128 dataset](https://media.roboflow.com/coco128.png)\n", + "\n", + "Here is the download code for the COCO 128 dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4MboNZCZsTfK" + }, + "outputs": [], + "source": [ + "!pip install roboflow -q\n", + "\n", + "API_KEY = \"\"\n", + "\n", + "from roboflow import Roboflow\n", + "\n", + "rf = Roboflow(api_key=API_KEY)\n", + "project = rf.workspace(\"team-roboflow\").project(\"coco-128\")\n", + "dataset = project.version(2).download(\"yolov8\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pf3aKIsGsTKD" + }, + "source": [ + "\n", + "Above, replace the value associated with the `API_KEY` variable with your Roboflow API key. [Learn how to retrieve your Robflow API key](https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key).\n", + "\n", + "Now that we have a dataset ready, we can create a vector database and start loading embeddings.\n", + "\n", + "Install the Chroma Python client and supervision, which we will use to open images in this notebook, with the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "smDcsb16rZdP" + }, + "outputs": [], + "source": [ + "!pip install chromadb supervision -q" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-IvKMl9IrcOJ" + }, + "source": [ + "Then, run the code below to calculate CLIP vectors for images in your dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "BPN4-uvLrbhQ" + }, + "outputs": [], + "source": [ + "import chromadb\n", + "import os\n", + "from chromadb.utils.data_loaders import ImageLoader\n", + "from chromadb.utils.embedding_functions import RoboflowEmbeddingFunction\n", + "import uuid\n", + "import cv2\n", + "import supervision as sv\n", + "\n", + "SERVER_URL = \"https://infer.roboflow.com\"\n", + "\n", + "ef = RoboflowEmbeddingFunction(API_KEY, api_url = SERVER_URL)\n", + "\n", + "client = chromadb.PersistentClient(path=\"database\")\n", + "\n", + "data_loader = ImageLoader()\n", + "\n", + "collection = client.create_collection(name=\"images_db2\", embedding_function=ef, data_loader=data_loader, metadata={\"hnsw:space\": \"cosine\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gAFEc5FJu7oj", + "outputId": "4bf5d5b8-0c88-4ff4-bb83-dcf47178c770" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['/content/COCO-128-2/train/images/000000000643_jpg.rf.fd058cb12cbda17a08a6254751aad243.jpg', '/content/COCO-128-2/train/images/000000000446_jpg.rf.4d5fe626e32b8b40408f9b711a10f04a.jpg', '/content/COCO-128-2/train/images/000000000321_jpg.rf.012f28b6a17e876bf2da17cab227c4cc.jpg', '/content/COCO-128-2/train/images/000000000315_jpg.rf.3613aa9bc01121a7949ecde10452ceef.jpg', '/content/COCO-128-2/train/images/000000000472_jpg.rf.68cb82fda7d6d5abec9e462eb191271a.jpg', '/content/COCO-128-2/train/images/000000000389_jpg.rf.1dd437dbb518e480b21652cf552e5b2d.jpg', '/content/COCO-128-2/train/images/000000000208_jpg.rf.5247f6d89d634629bfa005d8792613b4.jpg', '/content/COCO-128-2/train/images/000000000597_jpg.rf.2c8f04559f193762dc844986f6d60cad.jpg', '/content/COCO-128-2/train/images/000000000508_jpg.rf.4d5b0d34a3ddacbbae66a6d9a0c4daf5.jpg', '/content/COCO-128-2/train/images/000000000431_jpg.rf.56ab0d462037c5b588d11c7eb5b34278.jpg', '/content/COCO-128-2/train/images/000000000357_jpg.rf.fd60a5947f0f1b2ad6273d8ff87b6282.jpg', '/content/COCO-128-2/train/images/000000000138_jpg.rf.af439ef1c55dd8a4e4b142d186b9c957.jpg', '/content/COCO-128-2/train/images/000000000438_jpg.rf.c410916a61676ab359be5aebf293c85f.jpg', '/content/COCO-128-2/train/images/000000000419_jpg.rf.b58c3291ae18177c82e19195fd533614.jpg', '/content/COCO-128-2/train/images/000000000510_jpg.rf.a6383d3c4bffc15986bababf90bb2076.jpg', '/content/COCO-128-2/train/images/000000000143_jpg.rf.99b5580faf8d1ff7b0ac0b4345a4cf1a.jpg', '/content/COCO-128-2/train/images/000000000260_jpg.rf.be309dc699a9462430efc6dc624a0681.jpg', '/content/COCO-128-2/train/images/000000000110_jpg.rf.150f5495ca5c7842cfcd42a5aeae841f.jpg', '/content/COCO-128-2/train/images/000000000562_jpg.rf.9bb940c3cf544f101e88cd9052ddc3e0.jpg', '/content/COCO-128-2/train/images/000000000308_jpg.rf.220721ec7c9e25fe8596c5c64cc84a2c.jpg', '/content/COCO-128-2/train/images/000000000165_jpg.rf.eae14d5509bf0c9ceccddbb53a5f0c66.jpg', '/content/COCO-128-2/train/images/000000000415_jpg.rf.670afeffb0d21fd977df575a7a826b39.jpg', '/content/COCO-128-2/train/images/000000000436_jpg.rf.f75c349fa1c6f78054991af5238d8e0a.jpg', '/content/COCO-128-2/train/images/000000000192_jpg.rf.ad225cb1bc09bfe56f6282c3f7ce56af.jpg', '/content/COCO-128-2/train/images/000000000042_jpg.rf.a1f5c146b4b81881b19f342a148c0f51.jpg', '/content/COCO-128-2/train/images/000000000650_jpg.rf.1b74ba165c5a3513a3211d4a80b69e1c.jpg', '/content/COCO-128-2/train/images/000000000326_jpg.rf.02c19837e58093adec35e737066bb9ae.jpg', '/content/COCO-128-2/train/images/000000000149_jpg.rf.0a861d05b36be7927ab205acb325f4ce.jpg', '/content/COCO-128-2/train/images/000000000488_jpg.rf.c01187aceb8a49b901abe79739d6acdf.jpg', '/content/COCO-128-2/train/images/000000000241_jpg.rf.883e3e7bef174603fd5d2bbdbaf3a4ba.jpg', '/content/COCO-128-2/train/images/000000000370_jpg.rf.b13777ecf61d3edb14a6724a6331d4b7.jpg', '/content/COCO-128-2/train/images/000000000136_jpg.rf.71b51a4103c3e797f62a52a9d20fddfe.jpg', '/content/COCO-128-2/train/images/000000000625_jpg.rf.ce871c39393fefd9fd8671806761a1c8.jpg', '/content/COCO-128-2/train/images/000000000400_jpg.rf.fef965b3dde5237dcf1396f68c3b2a52.jpg', '/content/COCO-128-2/train/images/000000000073_jpg.rf.eb8e88e2239ba953b553f4e60b5d567c.jpg', '/content/COCO-128-2/train/images/000000000531_jpg.rf.5a9928283716bf2aac47963ca1a19afd.jpg', '/content/COCO-128-2/train/images/000000000612_jpg.rf.656879428df938a1a000bc255a193ccd.jpg', '/content/COCO-128-2/train/images/000000000142_jpg.rf.5d34f341f09bef6870b506337bb426ad.jpg', '/content/COCO-128-2/train/images/000000000036_jpg.rf.af0418c203165d3ee53b7dee5fe8b301.jpg', '/content/COCO-128-2/train/images/000000000089_jpg.rf.b82e9aa4a74633f95da698d712065b7a.jpg', '/content/COCO-128-2/train/images/000000000474_jpg.rf.bbd3bc1d1951dbca328a9a84c330b337.jpg', '/content/COCO-128-2/train/images/000000000404_jpg.rf.ca3ae1b40e22e5f6044ad9606b069ed7.jpg', '/content/COCO-128-2/train/images/000000000154_jpg.rf.300698916140dd41f6fda1c194d7b00d.jpg', '/content/COCO-128-2/train/images/000000000133_jpg.rf.8a7d1da21b04545e5543d28373d75cf1.jpg', '/content/COCO-128-2/train/images/000000000071_jpg.rf.1e9a11ad22ba304c5519e32b3ab47a48.jpg', '/content/COCO-128-2/train/images/000000000196_jpg.rf.8e48b2a4a9bd63fcd02ad61708d18ef2.jpg', '/content/COCO-128-2/train/images/000000000382_jpg.rf.c172a50ccf4da06a423497fac9d12579.jpg', '/content/COCO-128-2/train/images/000000000086_jpg.rf.556402c74eccf93483195dc2000ceeb2.jpg', '/content/COCO-128-2/train/images/000000000636_jpg.rf.e4dbb1e2ce37580cbc890709cad177a7.jpg', '/content/COCO-128-2/train/images/000000000328_jpg.rf.13cb1874b4f817acce9a8ad106b4225b.jpg', '/content/COCO-128-2/train/images/000000000113_jpg.rf.0b3da2103c183dbd0d3ae9a364b48458.jpg', '/content/COCO-128-2/train/images/000000000127_jpg.rf.b1a2420c07d63c4415880a90e639579f.jpg', '/content/COCO-128-2/train/images/000000000623_jpg.rf.85a92f3af326182db18c9f891a2a2d88.jpg', '/content/COCO-128-2/train/images/000000000532_jpg.rf.7f786bd3fd5dce29c2a1c58cd32b68d1.jpg', '/content/COCO-128-2/train/images/000000000338_jpg.rf.db620506b174e29042f3758665d1b384.jpg', '/content/COCO-128-2/train/images/000000000397_jpg.rf.9a764f8819f8ef9c3f93fdf33e9f4929.jpg', '/content/COCO-128-2/train/images/000000000294_jpg.rf.f4aaaee71c6e53cb4af0d42b7bc64c6b.jpg', '/content/COCO-128-2/train/images/000000000629_jpg.rf.d678871351924516e2310e8cbc18feeb.jpg', '/content/COCO-128-2/train/images/000000000250_jpg.rf.fe4daef71d5cf0dedf2d70ec1a053e05.jpg', '/content/COCO-128-2/train/images/000000000025_jpg.rf.782fe78a513b7eeded6172306f4f502c.jpg', '/content/COCO-128-2/train/images/000000000395_jpg.rf.7b3a9f039340b28eeea8eca51e875130.jpg', '/content/COCO-128-2/train/images/000000000540_jpg.rf.d42cc5cec9a137294c1d0dd81cacceaf.jpg', '/content/COCO-128-2/train/images/000000000061_jpg.rf.6945c178a14b90b01cd7d8b1e60e6a22.jpg', '/content/COCO-128-2/train/images/000000000387_jpg.rf.f6d68f4e3c90d097157a7d8fe1839b34.jpg', '/content/COCO-128-2/train/images/000000000529_jpg.rf.16dbfc2ac51287dba01ca3ae5028ece0.jpg', '/content/COCO-128-2/train/images/000000000109_jpg.rf.a74c9fd0bdb672415629919c2c802f2d.jpg', '/content/COCO-128-2/train/images/000000000368_jpg.rf.54fa547d31097e00d82aebd06a36df59.jpg', '/content/COCO-128-2/train/images/000000000620_jpg.rf.1e47693b5bf354d0ea9048db0be8227e.jpg', '/content/COCO-128-2/train/images/000000000283_jpg.rf.5b072b49882659b6ec9c9454d9623390.jpg', '/content/COCO-128-2/train/images/000000000572_jpg.rf.42b48081afe86d9e293734b0da064a71.jpg', '/content/COCO-128-2/train/images/000000000349_jpg.rf.9ed53bdff4c93ccc59fbe0b69de28cb5.jpg', '/content/COCO-128-2/train/images/000000000471_jpg.rf.faa1965b86263f4b92754c0495695c7e.jpg', '/content/COCO-128-2/train/images/000000000151_jpg.rf.f17fad0b1976f3f2dd638666617c159c.jpg', '/content/COCO-128-2/train/images/000000000312_jpg.rf.07ecf1a16342778b938ec2add28a7169.jpg', '/content/COCO-128-2/train/images/000000000443_jpg.rf.81f83991ba79c61e94912b2d34699024.jpg', '/content/COCO-128-2/train/images/000000000569_jpg.rf.af8d85de4a25be55832e3044098ea670.jpg', '/content/COCO-128-2/train/images/000000000634_jpg.rf.2feb5ee0e764217c6796acd17da1b7fa.jpg', '/content/COCO-128-2/train/images/000000000077_jpg.rf.520601990c512b7983888cbedfe14ca1.jpg', '/content/COCO-128-2/train/images/000000000450_jpg.rf.2cec6f80e01101afeb2060b0aac48e99.jpg', '/content/COCO-128-2/train/images/000000000194_jpg.rf.b6838f3b8a3e8626f1229fa694132bb9.jpg', '/content/COCO-128-2/train/images/000000000081_jpg.rf.5ac5126a29a5565691c27016453cb17b.jpg', '/content/COCO-128-2/train/images/000000000575_jpg.rf.6d3a2aa58cc3bd4db498d1a7b7a2c9d4.jpg', '/content/COCO-128-2/train/images/000000000428_jpg.rf.b5e82e0c67b750a2ac3eb0168d2b18d4.jpg', '/content/COCO-128-2/train/images/000000000009_jpg.rf.856f80d728927e943a5bccfdf49dd677.jpg', '/content/COCO-128-2/train/images/000000000641_jpg.rf.2ad873d1d358c17ad3149fac98f1e4bf.jpg', '/content/COCO-128-2/train/images/000000000257_jpg.rf.f6550733ae637214e209517d35c363b0.jpg', '/content/COCO-128-2/train/images/000000000064_jpg.rf.01bec3a770d2bc9cc8eb02d41a0c7f79.jpg', '/content/COCO-128-2/train/images/000000000360_jpg.rf.625cc871e851ded6de7bfe7687760f35.jpg', '/content/COCO-128-2/train/images/000000000094_jpg.rf.df1e8da2f564e0dc1f3e6401c05a1481.jpg', '/content/COCO-128-2/train/images/000000000309_jpg.rf.db7b22492fbb7f8d205fd1d00fe2280a.jpg', '/content/COCO-128-2/train/images/000000000030_jpg.rf.15401567b6ff1d3787995bde6eeee471.jpg', '/content/COCO-128-2/train/images/000000000584_jpg.rf.65a9bac7029d5afffc477baa9d3b43bc.jpg', '/content/COCO-128-2/train/images/000000000595_jpg.rf.8aa06812ef201b3ee0078533b0bbbdee.jpg', '/content/COCO-128-2/train/images/000000000564_jpg.rf.4ec8ed4abf0997c8cb5cea298fef3465.jpg', '/content/COCO-128-2/train/images/000000000307_jpg.rf.237662256b060aa457a2a09d95609b32.jpg', '/content/COCO-128-2/train/images/000000000560_jpg.rf.26e523d9666e9eadffd6df0f9c6754f0.jpg', '/content/COCO-128-2/train/images/000000000359_jpg.rf.01e0e6dc67e6fe503c7939d890bcd6d6.jpg', '/content/COCO-128-2/train/images/000000000263_jpg.rf.25c52657eff0a27882c0d1de8c72fe75.jpg', '/content/COCO-128-2/train/images/000000000605_jpg.rf.da1c1eeb4b1bca2c5862b31c0221a9b5.jpg', '/content/COCO-128-2/train/images/000000000514_jpg.rf.b65ef17c163528ff0f3c2131fd346a51.jpg', '/content/COCO-128-2/train/images/000000000486_jpg.rf.407a3acfe23dbf206d95b58a5d0aea37.jpg', '/content/COCO-128-2/train/images/000000000384_jpg.rf.616518e6584f3a3b798d270382dd2b16.jpg', '/content/COCO-128-2/train/images/000000000542_jpg.rf.f94e117d2fb63254d26cf370bcd0e86f.jpg', '/content/COCO-128-2/train/images/000000000201_jpg.rf.602495fdc61e3930f33684db99b1a869.jpg', '/content/COCO-128-2/train/images/000000000490_jpg.rf.b4d25b276034f3a0627399220c2cc0b8.jpg', '/content/COCO-128-2/train/images/000000000536_jpg.rf.04e1e3b47f96c565c19c6593aed44119.jpg', '/content/COCO-128-2/train/images/000000000332_jpg.rf.72bbbb24196a6387cbf5618e11f5f2ce.jpg', '/content/COCO-128-2/train/images/000000000074_jpg.rf.98fe5630d85530144d222acb65f55f15.jpg', '/content/COCO-128-2/train/images/000000000599_jpg.rf.06b89323b92ef324fdd5e7ecf7f20b9b.jpg', '/content/COCO-128-2/train/images/000000000581_jpg.rf.d56a78fa63c89e49f8b7092379a45c67.jpg', '/content/COCO-128-2/train/images/000000000049_jpg.rf.bfa6e0ac33a75f530011d8b3b50a3b5c.jpg', '/content/COCO-128-2/train/images/000000000459_jpg.rf.b6b7189271d5e7a826b5dad0923fbb87.jpg', '/content/COCO-128-2/train/images/000000000144_jpg.rf.09821163c359a738ad86e72865a310dc.jpg', '/content/COCO-128-2/train/images/000000000589_jpg.rf.a7b35721e40dfa21973cd1ab02a8fab4.jpg', '/content/COCO-128-2/train/images/000000000034_jpg.rf.a33e87d94b16c1112e8d9946fee784b9.jpg', '/content/COCO-128-2/train/images/000000000078_jpg.rf.afbc984af561c845df5edc5f96c4037f.jpg', '/content/COCO-128-2/train/images/000000000164_jpg.rf.276e7a583c850858df0541c60a19f28c.jpg', '/content/COCO-128-2/train/images/000000000092_jpg.rf.69d6172284e6afd6017db6ea911213ca.jpg', '/content/COCO-128-2/train/images/000000000322_jpg.rf.ca66cb6f129e0893f5f9139c3ec1617b.jpg', '/content/COCO-128-2/train/images/000000000544_jpg.rf.2762a5e736ff95a5e68bad19b86210e6.jpg', '/content/COCO-128-2/train/images/000000000072_jpg.rf.244aee5e871d00c83983a224a5eb8ed5.jpg', '/content/COCO-128-2/train/images/000000000590_jpg.rf.7d55377855a996a3c6ee00e8d98307b7.jpg', '/content/COCO-128-2/train/images/000000000502_jpg.rf.997919f6f6bc7d0929387ffcb6a49b24.jpg', '/content/COCO-128-2/train/images/000000000626_jpg.rf.b776e538820040987b54f49d4a2bb7fb.jpg', '/content/COCO-128-2/train/images/000000000247_jpg.rf.892bc95842cc54b7e6972677db7c5928.jpg', '/content/COCO-128-2/train/images/000000000491_jpg.rf.dd6cde4b463637c688bcae9dbb0488f0.jpg', '/content/COCO-128-2/train/images/000000000520_jpg.rf.30218d72a576772917cc478208e40924.jpg', '/content/COCO-128-2/train/images/000000000394_jpg.rf.757dedbde46dc32eed390faa679fc4f5.jpg']\n" + ] + } + ], + "source": [ + "IMAGE_DIR = dataset.location + \"/train/images\"\n", + "\n", + "documents = [os.path.join(IMAGE_DIR, img) for img in os.listdir(IMAGE_DIR)]\n", + "uris = [os.path.join(IMAGE_DIR, img) for img in os.listdir(IMAGE_DIR)]\n", + "ids = [str(uuid.uuid4()) for _ in range(len(documents))]\n", + "\n", + "collection.add(\n", + " uris=uris,\n", + " ids=ids,\n", + " metadatas=[{\"file\": file} for file in documents]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y-ai8v7BrozZ" + }, + "source": [ + "If you have downloaded custom images from a source other than the Roboflow snippet earlier in this notebook, replace `IMAGE_DIR` with the folder where your images are stored.\n", + "\n", + "In this code snippet, we create a new Chroma database called `images`. Our database will use cosine similarity for embedding comparisons.\n", + "\n", + "We calculate CLIP embeddings for all images in the `COCO128/train/images` folder using Inference. We save the embeddings in Chroma using the `collection.add()` method.\n", + "\n", + "We store the file names associated with each image in the `documents` variable, and embeddings in `embeddings`.\n", + "\n", + "If you want to use the hosted version of Roboflow Inference to calculate embeddings, replace the `SERVER_URL` value with `https://infer.roboflow.com`. We use the RoboflowEmbeddingFunction, built in to Chroma, to interact with Inference.\n", + "\n", + "Run the script above to calculate embeddings for a folder of images and save them in your database.\n", + "\n", + "We now have a vector database that contains some embeddings. Great! Let’s move on to the fun part: running a search query on our database." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KCWbrXsbrpmI" + }, + "source": [ + "### Step #3: Run a Search Query\n", + "\n", + "To run a search query, we need a text embedding of a query. For example, if we want to find vegetables in our collection of 128 images from the COCO dataset, we need to have a text embedding for the search phrase “baseball”.\n", + "\n", + "To calculate a text embedding, we can use Inference through the embedding function we defined earlier:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4DeO6T7xrs2K" + }, + "outputs": [], + "source": [ + "query = \"baseball\"\n", + "\n", + "results = collection.query(\n", + " n_results=3,\n", + " query_texts=query\n", + ")\n", + "top_result = results[\"metadatas\"][0][0][\"file\"]\n", + "\n", + "sv.plot_image(cv2.imread(top_result))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Me2eRcYPrrXL" + }, + "source": [ + "Our code returns the name of the image with the most similar embedding to the embedding of our text query.\n", + "\n", + "The top result is an image of a child holding a baseball glove in a park. Chroma successfully returned an image that matched our prompt." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file