Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More robust- retries & concurrent inputs #129

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .modal_inference import *
108 changes: 108 additions & 0 deletions image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from modal import Volume, Image, Mount
import os
from pathlib import Path
from ai_video_editor.stub import stub, REPO_HOME, LOCAL_CERT_PATH, CERT_PATH, EXTRA_ENV

LOCAL_VOLUME_DIR = "/video_llava_volume"
HF_DATASETS_CACHE = str(Path(LOCAL_VOLUME_DIR) / "hf_datasets_cache")
MODEL_CACHE = Path(LOCAL_VOLUME_DIR, "models")

LOCAL_VOLUME_NAME = "video-llava-volume"
local_volume = Volume.from_name(LOCAL_VOLUME_NAME, create_if_missing=True)
local_volumes = {
LOCAL_VOLUME_DIR: local_volume,
}
local_mounts = [
Mount.from_local_dir("./ai_video_editor/video_llava", remote_path=REPO_HOME),
]


def remove_old_files():
import shutil
shutil.rmtree('/volume/models', ignore_errors=True)

image = (
Image.from_registry(
"nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04", add_python="3.11"
)
.apt_install(
"git",
"curl",
"libgl1-mesa-glx",
"libglib2.0-0",
"libsm6",
"libxrender1",
"libxext6",
"ffmpeg",
"clang",
"libopenmpi-dev",
gpu="any",
)

.pip_install(
# "torch==2.1.2",
# "transformers==4.37.2",
# "bitsandbytes==0.42.0",
"torch==2.0.1", "torchvision==0.15.2",
"transformers==4.31.0", "tokenizers>=0.12.1,<0.14", "sentencepiece==0.1.99", "shortuuid",
"accelerate==0.21.0", "peft==0.4.0", "bitsandbytes==0.41.0",
"pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
"requests", "httpx==0.24.0", "uvicorn", "fastapi",
"einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
"tensorboardX==2.6.2.2", "gradio==3.37.0", "gradio_client==0.7.0",
"deepspeed==0.9.5", "ninja", "wandb",
"wheel",
gpu="any",
)
.run_commands(
"python -m bitsandbytes",
gpu="any"
)
.run_commands("pip install flash-attn --no-build-isolation", gpu="any")
.env({"PYTHONPATH": REPO_HOME, "HF_DATASETS_CACHE": HF_DATASETS_CACHE})
.pip_install(
"decord",
"opencv-python",
"git+https://github.com/facebookresearch/pytorchvideo.git@28fe037d212663c6a24f373b94cc5d478c8c1a1d",
gpu="any",
)
.pip_install(
"aiofiles",
"aioboto3",
)
.run_function(remove_old_files)
.copy_local_file(LOCAL_CERT_PATH, CERT_PATH)
.pip_install("boto3", "aioboto3")
.env(EXTRA_ENV)
.pip_install("diskcache")
)
# TODO bitsandbytes seems to not be working with gpu

def function_dec(**extras):
return stub.function(
image=image,
timeout=80000,
# checkpointing doesn't work because it restricts internet access
#checkpointing_enabled=True, # Enable memory checkpointing for faster cold starts.
_allow_background_volume_commits=True,
container_idle_timeout=120,
volumes=local_volumes,
mounts=local_mounts,
**extras,
)

def cls_dec(**extras):
return stub.cls(
image=image,
timeout=80000,
# checkpointing doesn't work because it restricts internet access
#checkpointing_enabled=True, # Enable memory checkpointing for faster cold starts.
container_idle_timeout=1200,
# TODO maybe turn on
allow_concurrent_inputs=4,
retries=3,
_allow_background_volume_commits=True,
volumes=local_volumes,
mounts=local_mounts,
**extras,
)
180 changes: 180 additions & 0 deletions modal_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import os
import shutil
import urllib

from modal import asgi_app, method, enter, build
from ai_video_editor.utils.fs_utils import async_copy_from_s3
from .image import LOCAL_VOLUME_DIR, MODEL_CACHE, cls_dec, function_dec, local_volume
from ai_video_editor.stub import stub, S3_VIDEO_PATH, VOLUME_DIR, volume as remote_volume
import diskcache as dc
from pathlib import Path
# for local testing
#S3_VIDEO_PATH= "s3_videos"
#MODEL_CACHE = "models"
#Path(VOLUME_DIR).mkdir(exist_ok=True, parents=True)
VIDEOS_DIR = Path(S3_VIDEO_PATH) / "videos"
IMAGES_DIR = Path(S3_VIDEO_PATH) / "images"



@cls_dec(gpu="any")
class VideoLlavaModel:
@enter()
def load_model(self):
self.cache = dc.Cache('.cache')
local_volume.reload()
import torch
from videollava.serve.gradio_utils import Chat
self.conv_mode = "llava_v1"
model_path = 'LanguageBind/Video-LLaVA-7B'
device = 'cuda'
load_8bit = False
load_4bit = True
self.dtype = torch.float16
self.handler = Chat(model_path, conv_mode=self.conv_mode, load_8bit=load_8bit, load_4bit=load_4bit, device=device, cache_dir=str(MODEL_CACHE))
# self.handler.model.to(dtype=self.dtype)

def copy_file_from_remote_volume(self, filepath):
in_volume_path = filepath.split('/', 2)[-1]
local_volume_path = Path(LOCAL_VOLUME_DIR) / in_volume_path
local_volume_path.parent.mkdir(parents=True, exist_ok=True)
if not local_volume_path.exists():
shutil.copy(filepath, str(local_volume_path))

async def copy_file_from_s3(self, filepath):
bucket, in_bucket_path = filepath.replace('s3://','').split('/', 1)
await async_copy_from_s3(bucket, in_bucket_path, str(Path(VOLUME_DIR) / in_bucket_path))

async def copy_file_to_local(self, filepath):
if not filepath:
return
if filepath.startswith('s3://'):
await self.copy_file_from_s3(filepath)
else:
self.copy_file_from_remote_volume(filepath)

@method()
async def generate(self, image1, video, textbox_in, use_existing_output=True):
inputs = (image1, video, textbox_in)
if inputs in self.cache and use_existing_output:
res = self.cache[inputs]
self.cache.close()
return res
remote_volume.reload()
local_volume.reload()
await self.copy_file_to_local(image1)
await self.copy_file_to_local(video)

from videollava.conversation import conv_templates
from videollava.constants import DEFAULT_IMAGE_TOKEN
if not textbox_in:
raise ValueError("no prompt provided")

image1 = image1 if image1 else "none"
video = video if video else "none"

state_ = conv_templates[self.conv_mode].copy()
images_tensor = []

text_en_in = textbox_in.replace("picture", "image")

image_processor = self.handler.image_processor
if os.path.exists(image1) and not os.path.exists(video):
tensor = image_processor.preprocess(image1, return_tensors='pt')['pixel_values'][0]
tensor = tensor.to(self.handler.model.device, dtype=self.dtype)
images_tensor.append(tensor)
video_processor = self.handler.video_processor
if not os.path.exists(image1) and os.path.exists(video):
tensor = video_processor(video, return_tensors='pt')['pixel_values'][0]
tensor = tensor.to(self.handler.model.device, dtype=self.dtype)
images_tensor.append(tensor)
if os.path.exists(image1) and os.path.exists(video):
tensor = video_processor(video, return_tensors='pt')['pixel_values'][0]
tensor = tensor.to(self.handler.model.device, dtype=self.dtype)
images_tensor.append(tensor)

tensor = image_processor.preprocess(image1, return_tensors='pt')['pixel_values'][0]
tensor = tensor.to(self.handler.model.device, dtype=self.dtype)
images_tensor.append(tensor)

if os.path.exists(image1) and not os.path.exists(video):
text_en_in = DEFAULT_IMAGE_TOKEN + '\n' + text_en_in
elif not os.path.exists(image1) and os.path.exists(video):
text_en_in = ''.join([DEFAULT_IMAGE_TOKEN] * self.handler.model.get_video_tower().config.num_frames) + '\n' + text_en_in
elif os.path.exists(image1) and os.path.exists(video):
text_en_in = ''.join([DEFAULT_IMAGE_TOKEN] * self.handler.model.get_video_tower().config.num_frames) + '\n' + text_en_in + '\n' + DEFAULT_IMAGE_TOKEN
else:
print("WARNING: No image or video supplied")

text_en_out, _ = self.handler.generate(images_tensor, text_en_in, first_run=True, state=state_)

text_en_out = text_en_out.split('#')[0]
textbox_out = text_en_out

if not textbox_out:
raise ValueError("no text generated")
self.cache.set(inputs, textbox_out)
self.cache.close()
return textbox_out



def fastapi_app():
from fastapi import FastAPI, UploadFile, File
import aiofiles

Path(MODEL_CACHE).mkdir(exist_ok=True, parents=True)
VIDEOS_DIR.mkdir(exist_ok=True, parents=True)
IMAGES_DIR.mkdir(exist_ok=True, parents=True)

app = FastAPI()
model = VideoLlavaModel()

@app.post("/upload")
async def upload(
file: UploadFile = File(...),
):
local_volume.reload()
filename_decoded = urllib.parse.unquote(file.filename)
file_path = str(Path(LOCAL_VOLUME_DIR) / filename_decoded)
async with aiofiles.open(file_path, "wb") as buffer:
while content := await file.read(1024): # Read chunks of 1024 bytes
await buffer.write(content)
local_volume.commit()
return {"file_path": file_path}

@app.post("/inference")
async def inference(
video_file_name: str = '',
video_file_path: str = '',
image_file_name: str = '',
image_file_path: str = '',
prompt: str = '',
):
video_file_name = urllib.parse.unquote(video_file_name)
video_file_path = urllib.parse.unquote(video_file_path)
if video_file_path is None or video_file_path == '':
if video_file_name is None or video_file_name == '':
raise ValueError("one of video_file_path or video_file_name must be specified")
video_file_path = str(VIDEOS_DIR / video_file_name)

image_file_name = urllib.parse.unquote(image_file_name)
image_file_path = urllib.parse.unquote(image_file_path)
if image_file_path is None or image_file_path == '':
if image_file_name is not None and image_file_name != '':
image_file_path = str(IMAGES_DIR / image_file_name)

return model.generate.remote(image_file_path, video_file_path, prompt)
return app


@function_dec()
@asgi_app()
def fastapi_app_modal():
return fastapi_app()

# local testing:
# comment this out to deploy
# app = fastapi_app()
# conda activate videollava
# uvicorn modal_inference:app
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies = [
"pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
"requests", "httpx==0.24.0", "uvicorn", "fastapi",
"einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
"tensorboardX==2.6.2.2", "gradio==3.37.0", "gradio_client==0.7.0"
"tensorboardX==2.6.2.2", "gradio==3.37.0", "gradio_client==0.7.0", "modal"
]

[project.optional-dependencies]
Expand Down
Loading