Skip to content

Commit

Permalink
adding containerized cog inference
Browse files Browse the repository at this point in the history
  • Loading branch information
daanelson committed May 19, 2023
1 parent 7c3242f commit a088470
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Ishan Misra*

To appear at CVPR 2023 (*Highlighted paper*)

[[`Paper`](https://facebookresearch.github.io/ImageBind/paper)] [[`Blog`](https://ai.facebook.com/blog/imagebind-six-modalities-binding-ai/)] [[`Demo`](https://imagebind.metademolab.com/)] [[`Supplementary Video`](https://dl.fbaipublicfiles.com/imagebind/imagebind_video.mp4)] [[`BibTex`](#citing-imagebind)]
[[`Paper`](https://facebookresearch.github.io/ImageBind/paper)] [[`Blog`](https://ai.facebook.com/blog/imagebind-six-modalities-binding-ai/)] [[`Demo`](https://imagebind.metademolab.com/)] [[`Supplementary Video`](https://dl.fbaipublicfiles.com/imagebind/imagebind_video.mp4)] [[`BibTex`](#citing-imagebind)] [[`Replicate Demo`](https://replicate.com/daanelson/imagebind)]

PyTorch implementation and pretrained models for ImageBind. For details, see the paper: **[ImageBind: One Embedding Space To Bind Them All](https://facebookresearch.github.io/ImageBind/paper)**.

Expand Down
36 changes: 36 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
# set to true if your model requires a GPU
gpu: true
cuda: "11.6"

# a list of ubuntu apt packages to install
# system_packages:
# - "libgl1-mesa-glx"
# - "libglib2.0-0"

# python version in the form '3.8' or '3.8.12'
python_version: "3.9"

# a list of packages in the format <package-name>==<version>
python_packages:
- "torch==1.13"
- "torchvision==0.14.0"
- "torchaudio==0.13.0"
- "pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@28fe037d212663c6a24f373b94cc5d478c8c1a1d"
- "timm==0.6.7"
- "ftfy"
- "regex"
- "einops"
- "fvcore"
- "decord==0.6.0"

# commands run after the environment is setup
# run:
# - "echo env is ready!"
# - "echo another command if needed"

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
69 changes: 69 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

from typing import List, Optional
from cog import BasePredictor, Input, Path
import data
import torch
from models import imagebind_model
from models.imagebind_model import ModalityType

MODALITY_TO_PREPROCESSING = {
ModalityType.TEXT: data.load_and_transform_text,
ModalityType.VISION: data.load_and_transform_vision_data,
ModalityType.AUDIO: data.load_and_transform_audio_data,
}


class Predictor(BasePredictor):
def setup(self):
"""Load the model into memory to make running multiple predictions efficient"""
model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
self.model = model.to("cuda")

def predict(
self,
input: Path = Input(
description="file that you want to embed. Needs to be text, vision, or audio.",
default=None,
),
text_input: str = Input(
description="text that you want to embed. Provide a string here instead of a text file to input if you'd like.",
default=None,
),
modality: str = Input(
description="modality of the input you'd like to embed",
choices=list(MODALITY_TO_PREPROCESSING.keys()),
default=ModalityType.VISION,
),
) -> List[float]:
"""Infer a single embedding with the model"""

if not input and not text_input:
raise Exception(
"Neither input nor text_input were provided! Provide one in order to generate an embedding"
)

modality_function = MODALITY_TO_PREPROCESSING[modality]

if modality == "text":
if input and text_input:
raise Exception(
f"Input and text_input were both provided! Only provide one to generate an embedding.\nInput provided: {input}\nText Input provided: {text_input}"
)
if text_input:
input = text_input
else:
with open(input, "r") as f:
text_input = f.readlines()
input = text_input

device = "cuda"
model_input = {modality: modality_function([input], device)}

with torch.no_grad():
embeddings = self.model(model_input)
# print(type(embeddings))
emb = embeddings[modality]
return emb.cpu().squeeze().tolist()

0 comments on commit a088470

Please sign in to comment.