Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental image support #314

Merged
merged 2 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/docs/predict-tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,21 @@ And there's so much more! You can also do things like specify your region for fa
### 5. Test your configuration with a dry run

Before kicking off a full run of inference, we recommend testing your code with a "dry run". This will run one batch of inference to quickly detect any bugs. See the [Debugging](debugging.md) page for details.


## Predicting species from images

Zamba does not currently provide comprehensive support for images by default, only videos. We do, however, have experimental support for making predictions on images using our existing models. This may be useful if you have a few images that you would like to classify or you want to compare the performance on a small set of images.

To do this, you will need to set the environment variable `PREDICT_ON_IMAGES=True` (for example by prefacing the `zamba` command with it: `PREDICT_ON_IMAGES=True zamba predict --data-dir example_images/`).

By default, `zamba` will look for files with the following suffixes: `.jpg`, `.jpeg`, `.png`, and `.webp`. To use other image suffixes that are supported by OpenCV, set your `IMAGE_SUFFIXES` environment variable.

The caveats are:

- The models may be less accurate since there is less information in a single image than in a video.
- This approach will be computationally inefficient as compared to a model that works natively on images.
- Blank / non-blank detection may be less effective since only the classification portion is executed, not the detection portion.
- This is not recommended for training or finetuning scenarios given the computational inefficiency.

More comprehensive image support is planned for a future release.
Binary file added tests/assets/images/chimpanzee_bonobo.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/images/equid.webp
Binary file not shown.
Binary file added tests/assets/images/small_cat.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/images/wild_dog_jackal.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 40 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
from pathlib import Path
import shutil

from typer.testing import CliRunner
import pandas as pd
import pytest
from pytest_mock import mocker # noqa: F401

import zamba
from zamba.cli import app

from conftest import ASSETS_DIR, TEST_VIDEOS_DIR
Expand Down Expand Up @@ -189,6 +191,44 @@ def test_actual_prediction_on_single_video(tmp_path, model): # noqa: F811
)


@pytest.mark.parametrize("model", ["time_distributed", "blank_nonblank"])
def test_actual_prediction_on_images(tmp_path, model, mocker): # noqa: F811
"""Tests experimental feature of predicting on images."""
shutil.copytree(ASSETS_DIR / "images", tmp_path / "images")
data_dir = tmp_path / "images"

save_dir = tmp_path / "zamba"

mocker.patch.object(zamba.models.config, "PREDICT_ON_IMAGES", True)

result = runner.invoke(
app,
[
"predict",
"--data-dir",
str(data_dir),
"--yes",
"--save-dir",
str(save_dir),
"--model",
model,
],
)
assert result.exit_code == 0
# check preds file got saved out
assert save_dir.exists()
# check config got saved out too
assert (save_dir / "predict_configuration.yaml").exists()
df = pd.read_csv(save_dir / "zamba_predictions.csv", index_col="filepath")

if model == "time_distributed":
for img, label in df.idxmax(axis=1).items():
assert Path(img).stem == label

if model == "blank_nonblank":
assert (df.blank < 0.1).all()


def test_depth_cli_options(mocker, tmp_path): # noqa: F811
mocker.patch("zamba.models.depth_estimation.config.DepthEstimationConfig.run_model", pred_mock)

Expand Down
44 changes: 44 additions & 0 deletions zamba/data/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
MegadetectorLiteYoloX,
MegadetectorLiteYoloXConfig,
)
from zamba.settings import IMAGE_SUFFIXES


def ffprobe(path: os.PathLike) -> pd.Series:
Expand Down Expand Up @@ -414,13 +415,49 @@
)


def load_and_repeat_image(path, target_size=(224, 224), repeat_count=4):
"""
Loads an image, resizes it, and repeats it N times.

Args:
path: Path to the image file.
target_size: A tuple (w, h) representing the desired width and height of the resized image.
repeat_count: Number of times to repeat the image.

Returns:
A NumPy array of shape (N, h, w, 3) representing the repeated image.
"""
image = cv2.imread(str(path))

Check warning on line 430 in zamba/data/video.py

View check run for this annotation

Codecov / codecov/patch

zamba/data/video.py#L430

Added line #L430 was not covered by tests

# Resize the image in same way as video frames are in `load_video_frames`
image = cv2.resize(

Check warning on line 433 in zamba/data/video.py

View check run for this annotation

Codecov / codecov/patch

zamba/data/video.py#L433

Added line #L433 was not covered by tests
image,
target_size,
# https://stackoverflow.com/a/51042104/1692709
interpolation=(
cv2.INTER_LINEAR
if image.shape[1] < target_size[0] # compare image width with target width
else cv2.INTER_AREA
),
)

image_array = np.expand_dims(image, axis=0)

Check warning on line 444 in zamba/data/video.py

View check run for this annotation

Codecov / codecov/patch

zamba/data/video.py#L444

Added line #L444 was not covered by tests

# Repeat the image N times
repeated_image = np.repeat(image_array, repeat_count, axis=0)

Check warning on line 447 in zamba/data/video.py

View check run for this annotation

Codecov / codecov/patch

zamba/data/video.py#L447

Added line #L447 was not covered by tests

return repeated_image

Check warning on line 449 in zamba/data/video.py

View check run for this annotation

Codecov / codecov/patch

zamba/data/video.py#L449

Added line #L449 was not covered by tests


def load_video_frames(
filepath: os.PathLike,
config: Optional[VideoLoaderConfig] = None,
**kwargs,
):
"""Loads frames from videos using fast ffmpeg commands.

Supports images as well, but it is inefficient since we just replicate the frames.

Args:
filepath (os.PathLike): Path to the video.
config (VideoLoaderConfig, optional): Configuration for video loading.
Expand All @@ -435,6 +472,13 @@
if config is None:
config = VideoLoaderConfig(**kwargs)

if Path(filepath).suffix.lower() in IMAGE_SUFFIXES:
return load_and_repeat_image(

Check warning on line 476 in zamba/data/video.py

View check run for this annotation

Codecov / codecov/patch

zamba/data/video.py#L476

Added line #L476 was not covered by tests
filepath,
target_size=(config.model_input_width, config.model_input_height),
repeat_count=config.total_frames,
)

video_stream = get_video_stream(filepath)
w = int(video_stream["width"])
h = int(video_stream["height"])
Expand Down
6 changes: 4 additions & 2 deletions zamba/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
RegionEnum,
)
from zamba.pytorch.transforms import zamba_image_model_transforms, slowfast_transforms
from zamba.settings import SPLIT_SEED, VIDEO_SUFFIXES
from zamba.settings import IMAGE_SUFFIXES, PREDICT_ON_IMAGES, SPLIT_SEED, VIDEO_SUFFIXES


GPUS_AVAILABLE = torch.cuda.device_count()
Expand Down Expand Up @@ -224,11 +224,13 @@ def get_filepaths(cls, values):
new_suffixes = []

# iterate over all files in data directory
for f in values["data_dir"].rglob("*"):
for f in Path(values["data_dir"]).rglob("*"):
if f.is_file():
# keep just files with supported suffixes
if f.suffix.lower() in VIDEO_SUFFIXES:
files.append(f.resolve())
elif PREDICT_ON_IMAGES and f.suffix.lower() in IMAGE_SUFFIXES:
files.append(f.resolve())
else:
new_suffixes.append(f.suffix.lower())

Expand Down
7 changes: 7 additions & 0 deletions zamba/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,10 @@

# random seed to use for splitting data without site info into train / val / holdout sets
SPLIT_SEED = os.environ.get("SPLIT_SEED", 4007)


# experimental support for predicting on images
IMAGE_SUFFIXES = [
ext.strip() for ext in os.environ.get("IMAGE_SUFFIXES", ".jpg,.jpeg,.png,.webp").split(",")
]
PREDICT_ON_IMAGES = os.environ.get("PREDICT_ON_IMAGES", "False").lower() == "true"
Loading