Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support SAMURAI #8

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@ jobs:

- name: Build Docker image
run: |
docker build -t ghcr.io/${{ github.repository }}_dev:latest --target builder_image .
docker build -t ghcr.io/${{ github.repository }}:latest --target release_image .
docker build -t ghcr.io/${{ github.repository }}:latest .
# docker build -t ghcr.io/${{ github.repository }}_dev:latest --target builder_image .
# docker build -t ghcr.io/${{ github.repository }}:latest --target release_image .

- name: Push Docker image
if: github.event_name == 'push'
run: |
docker push ghcr.io/${{ github.repository }}_dev:latest
docker push ghcr.io/${{ github.repository }}:latest
# docker push ghcr.io/${{ github.repository }}_dev:latest
# docker push ghcr.io/${{ github.repository }}:latest
2 changes: 1 addition & 1 deletion .github/workflows/python-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ jobs:
pip install flake8

- name: Run flake8
run: flake8 .
run: flake8 --ignore=E402 .
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@
*.engine
*.mcap

flagged*/
flagged/
datasets/
runs/
__pycache__/
50 changes: 36 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,41 @@
FROM ultralytics/ultralytics:8.2.63 AS builder_image
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime

RUN apt-get update && apt-get install -y git curl ffmpeg
RUN git config --global --add safe.directory /app
RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install poetry \
&& poetry config virtualenvs.create false
RUN apt update && apt install -y git vim curl
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
libavutil-dev \
libavcodec-dev \
libavformat-dev \
libswscale-dev \
pkg-config \
build-essential \
libffi-dev
RUN pip install --upgrade pip setuptools

WORKDIR /app
RUN git clone -b master --single-branch --depth=1 https://github.com/moriyalab/samurai.git
RUN cd /workspace/samurai/sam2 && \
pip install -e .

COPY pyproject.toml poetry.lock ./
RUN poetry install --no-root
RUN cd /workspace/samurai/sam2 && \
pip install -e ".[notebooks]" && \
pip install \
matplotlib==3.7 \
tikzplotlib \
jpeg4py \
opencv-python \
lmdb \
pandas \
scipy \
loguru \
flake8 \
ultralytics==8.2.63 \
gradio==4.44.0 \
ffmpeg-python==0.2.0 \
gdown==5.2 \
lapx==0.5.10

FROM builder_image AS release_image
RUN cd /workspace/samurai/sam2/checkpoints && \
./download_ckpts.sh && \
cd ..

COPY --from=builder_image /app /app
COPY . /app/

CMD [ "python3", "app.py" ]
RUN mkdir -p /workspace/horus_inference_server
262 changes: 95 additions & 167 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,188 +1,116 @@
import gradio as gr
import cv2
from ultralytics import RTDETR
import pandas as pd
import tempfile
import io
import glob
import shutil
import os

mode_list = ["Create Fully Annotated Video", "Create Time-Lapse Video", "Do Not Create Video"]
from horus import util
from horus import inference

video_path_ = ""

def upload_mlmodel(filepaths):
dest_dir = './ml_model'
value_x = 0
value_y = 0
value_w = 0
value_h = 0

if not os.path.exists(dest_dir):
os.makedirs(dest_dir)

for filepath in filepaths:
if os.path.isfile(filepath):
shutil.copy(filepath, dest_dir)
else:
return f'{filepath} is not found'
def update_input_video(videoPath: str):
global video_path_
video_path_ = videoPath
cap = cv2.VideoCapture(videoPath)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
frame = update_target_frame(video_path_, 0, 0, 0, 0)

return "Upload complete. Please restart gradio_web_ui.py"
return gr.update(maximum=width), gr.update(maximum=height), gr.update(maximum=width), gr.update(maximum=height), frame


def inference_image(image, mlmodel_name: str, confidence: float):
model = RTDETR(mlmodel_name)
results = model.predict(
source=image,
conf=confidence / 100,
verbose=False
)
annotated_frame = results[0].plot()
results = results[0].cpu()

boxes_info = []
for box_data in results.boxes:
box = box_data.xywh[0]
xmin = max(0, min(int(box[0] - box[2] / 2), 65535))
ymin = max(0, min(int(box[1] - box[3] / 2), 65535))
xmax = max(0, min(int(box[0] + box[2] / 2), 65535))
ymax = max(0, min(int(box[1] + box[3] / 2), 65535))
boxes_info.append([xmin, ymin, xmax, ymax, float(box_data.conf), model.names[int(box_data.cls)]])

df = pd.DataFrame(boxes_info, columns=["xmin", "ymin", "xmax", "ymax", "confidence", "label"])
csv_buffer = io.StringIO()
df.to_csv(csv_buffer, index=False)
csv_data = csv_buffer.getvalue()
return annotated_frame, csv_data


def infer_video(videos, mlmodel_name: str, confidence: float, mode: float, progress=gr.Progress()):
global mode_list
model = RTDETR(mlmodel_name)
output_files = []
boxes_info = []
for video in videos:
cap = cv2.VideoCapture(video)
fps = float(cap.get(cv2.CAP_PROP_FPS))

if mode_list[0] == mode or mode_list[1] == mode:
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

try:
while cap.isOpened():
ret, frame = cap.read()
progress(cap.get(cv2.CAP_PROP_POS_FRAMES) / cap.get(cv2.CAP_PROP_FRAME_COUNT))
if not ret:
break

results = model.track(
source=frame,
device="cuda:0",
verbose=False,
persist=True,
tracker="botsort.yaml",
conf=confidence / 100,
half=True)

if mode_list[0] == mode:
annotated_frame = results[0].plot()
out.write(annotated_frame)
elif mode_list[1] == mode and cap.get(cv2.CAP_PROP_POS_FRAMES) % int(fps) == 0:
annotated_frame = results[0].plot()
out.write(annotated_frame)

for box_data in results[0].cpu().boxes:
box = box_data.xywh[0]
xmin = max(0, min(int(box[0] - box[2] / 2), 65535))
ymin = max(0, min(int(box[1] - box[3] / 2), 65535))
xmax = max(0, min(int(box[0] + box[2] / 2), 65535))
ymax = max(0, min(int(box[1] + box[3] / 2), 65535))
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC)
boxes_info.append([timestamp, xmin, ymin, xmax, ymax, float(box_data.conf), model.names[int(box_data.cls)]])

finally:
cap.release()

df = pd.DataFrame(boxes_info, columns=["timestamp", "xmin", "ymin", "xmax", "ymax", "confidence", "label"])
csv_buffer = io.StringIO()
df.to_csv(csv_buffer, index=False)

csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv').name
with open(csv_file, "w", encoding="utf-8") as file:
file.write(csv_buffer.getvalue())

out.release()
output_files.append(output_file)
output_files.append(csv_file)

return output_files
def update_input_x(value):
global video_path_, value_x, value_y, value_w, value_h
value_x = value
return update_target_frame(video_path_, value_x, value_y, value_w, value_h)


def update_input_y(value):
global video_path_, value_x, value_y, value_w, value_h
value_y = value
return update_target_frame(video_path_, value_x, value_y, value_w, value_h)


def update_input_w(value):
global video_path_, value_x, value_y, value_w, value_h
value_w = value
return update_target_frame(video_path_, value_x, value_y, value_w, value_h)


def update_input_h(value):
global video_path_, value_x, value_y, value_w, value_h
value_h = value
return update_target_frame(video_path_, value_x, value_y, value_w, value_h)


def update_target_frame(video_path, x, y, w, h):
frame = util.get_image_from_video(video_path, 0)
start_point = (int(x), int(y))
end_point = (int(x + w), int(y + h))
color = (255, 0, 0)
thickness = 5
cv2.rectangle(frame, start_point, end_point, color, thickness)
return gr.Image(value=frame)


with gr.Blocks() as main_ui:
with gr.Tab("Upload ML Model"):
gr.Interface(
upload_mlmodel,
[
gr.File(label="Upload a ml model", file_count="multiple", file_types=["pt", "onnx", "engine"])
],
[
gr.Textbox(label="Result")
]
)
with gr.Tab("Image Inference"):
gr.Interface(
inference_image,
[
gr.Image(type="numpy", label="Upload an Image"),
gr.Dropdown(
glob.glob("./ml_model/*"),
value="rtdetr-l.pt",
label="ML Model",
info="Please place the RT-DETR model in the ml_model directory under the root directory of this project! It supports extensions like .pt, .onnx, and .engine!"
),
gr.Slider(
with gr.Tab("Video Inference"):
with gr.Row():
with gr.Column():
input_video = gr.File(label="Upload Video", file_count="single", file_types=[".mp4", ".mov", ".mpg"])
output_image = gr.Image(type="numpy", label="result image")
input_x = gr.Slider(
minimum=0,
maximum=100,
value=75,
label="Confidence",
step=5,
info="Choose between 0% and 100%"
),
],
[
gr.Image(type="numpy", label="result image"),
gr.Textbox(label="Bounding Boxes CSV"),
]
)
with gr.Tab("Video Inferemce"):
gr.Interface(
infer_video,
[
gr.File(label="Upload a Video", file_count="multiple", file_types=["mp4", "mpg", "MOV"]),
gr.Dropdown(
glob.glob("./ml_model/*"),
value="rtdetr-l.pt",
label="ML Model",
info="Please place the RT-DETR model in the ml_model directory under the root directory of this project! It supports extensions like .pt, .onnx, and .engine!"
),
gr.Slider(
value=0,
label="Input X",
step=1,
)
input_y = gr.Slider(
minimum=0,
maximum=100,
value=75,
label="Confidence",
step=5,
info="Choose between 0% and 100%"
),
gr.Radio(
mode_list,
label="Video Creation Options",
info="Choose the type of video to create: fully annotated, time-lapse, or none."
),
],
[
gr.File(label="Annotated Video"),
]
value=0,
label="Input Y",
step=1,
)
input_w = gr.Slider(
minimum=0,
maximum=100,
value=0,
label="Input W",
step=1,
)
input_h = gr.Slider(
minimum=0,
maximum=100,
value=0,
label="Input H",
step=1,
)
submit_button = gr.Button("Start Inference")
with gr.Column():
output_video = gr.File()

input_video.change(
update_input_video,
inputs=input_video,
outputs=[input_x, input_y, input_w, input_h, output_image]
)

input_x.change(update_input_x, inputs=input_x, outputs=[output_image])
input_y.change(update_input_y, inputs=input_y, outputs=[output_image])
input_w.change(update_input_w, inputs=input_w, outputs=[output_image])
input_h.change(update_input_h, inputs=input_h, outputs=[output_image])

submit_button.click(
inference.main_inference,
inputs=[input_video, input_x, input_y, input_w, input_h],
outputs=[output_video])


if __name__ == "__main__":
main_ui.queue().launch(server_name="0.0.0.0")
8 changes: 0 additions & 8 deletions convert_to_tensorrt.py

This file was deleted.

8 changes: 8 additions & 0 deletions horus/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from horus import samurai
from horus import util


def main_inference(video_path, x, y, w, h):
folder_path = util.video_to_images(video_path)
result_video_path = samurai.samurai_inference(folder_path, x, y, w, h)
return result_video_path
Loading
Loading