Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved heuristic transformation estimation algorithms and integrated transformation.Buffer with them #57

Merged
merged 4 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion child_lab_framework/_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,18 @@ def estimate_transformations(
required=False,
help='Torch device to use for tensor computations',
)
@click.option(
'--skip',
type=int,
required=False,
help='Seconds of videos to skip at the beginning',
)
# @click_trap()
def process(
workspace: Path,
videos: list[Path],
device: str | None,
skip: int | None,
) -> None:
video_dir = workspace / 'input'
calibration_dir = workspace / 'calibration'
Expand Down Expand Up @@ -227,7 +234,7 @@ def process(

click.echo(f'Processing {"video" if len(videos) == 1 else "videos"}...')

demo_sequential.main(inputs, device_handle, destination) # type: ignore
demo_sequential.main(inputs, device_handle, destination, skip) # type: ignore

click.echo('Done!')

Expand Down
147 changes: 123 additions & 24 deletions child_lab_framework/_procedure/demo_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@

import torch

from ..core import transformation
from ..core.video import Format, Input, Reader, Writer
from ..logging import Logger
from ..task import depth, face, gaze, pose
from ..task.camera import transformation
from ..task.camera.transformation import heuristic as heuristic_transformation
from ..task.visualization import Configuration as VisualizationConfiguration
from ..task.visualization import Visualizer

BATCH_SIZE = 32


def main(
inputs: tuple[Input, Input, Input], device: torch.device, output_directory: Path
inputs: tuple[Input, Input, Input],
device: torch.device,
output_directory: Path,
skip: int | None,
) -> None:
# ignore exceeded allocation limit on MPS and CUDA - very important!
os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
Expand All @@ -38,6 +42,7 @@ def main(
width=ceiling_properties.width,
fps=ceiling_properties.fps,
)
window_left_properties = window_left_reader.properties

window_right_reader = Reader(
window_right,
Expand All @@ -46,20 +51,32 @@ def main(
width=ceiling_properties.width,
fps=ceiling_properties.fps,
)
window_right_properties = window_right_reader.properties

depth_estimator = depth.Estimator(executor, device, input=ceiling_reader.properties)
depth_estimator = depth.Estimator(executor, device, input=ceiling_properties)

transformation_estimator = transformation.heuristic.Estimator(
transformation_buffer: transformation.Buffer[str] = transformation.Buffer()

window_left_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
executor,
transformation_buffer,
window_left_properties,
ceiling_properties,
keypoint_threshold=0.35,
)

window_right_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
executor,
window_left_reader.properties,
ceiling_reader.properties,
transformation_buffer,
window_right_properties,
ceiling_properties,
keypoint_threshold=0.35,
)

pose_estimator = pose.Estimator(
executor,
device,
input=ceiling_reader.properties,
input=ceiling_properties,
max_detections=2,
threshold=0.5,
)
Expand All @@ -69,69 +86,99 @@ def main(
# A workaround to use the model efficiently on both desktop and server.
# TODO: remove this as soon as it's possible to specify device per component via CLI/config file.
device if device == torch.device('cuda') else torch.device('cpu'),
input=ceiling_reader.properties,
input=ceiling_properties,
confidence_threshold=0.5,
suppression_threshold=0.1,
)

window_left_gaze_estimator = gaze.Estimator(
executor,
input=window_left_reader.properties,
input=window_left_properties,
)

window_right_gaze_estimator = gaze.Estimator(
executor,
input=window_right_reader.properties,
input=window_right_properties,
)

ceiling_gaze_estimator = gaze.ceiling_projection.Estimator(
executor,
ceiling_reader.properties,
window_left_reader.properties,
window_right_reader.properties,
ceiling_properties,
window_left_properties,
window_right_properties,
)

# social_distance_estimator = social_distance.Estimator(executor)
# social_distance_logger = social_distance.FileLogger('dev/output/distance.csv')

ceiling_visualizer = Visualizer(
executor,
properties=ceiling_reader.properties,
properties=ceiling_properties,
configuration=VisualizationConfiguration(),
)

window_left_visualizer = Visualizer(
executor,
properties=window_left_reader.properties,
properties=window_left_properties,
configuration=VisualizationConfiguration(),
)

window_right_visualizer = Visualizer(
executor,
properties=window_right_reader.properties,
properties=window_right_properties,
configuration=VisualizationConfiguration(),
)

ceiling_writer = Writer(
output_directory / (ceiling.name + '.mp4'),
ceiling_reader.properties,
ceiling_properties,
output_format=Format.MP4,
)

ceiling_projection_writer = Writer(
output_directory / (ceiling.name + '_projections.mp4'),
ceiling_properties,
output_format=Format.MP4,
)

ceiling_depth_writer = Writer(
output_directory / (ceiling.name + '_depth.mp4'),
ceiling_properties,
output_format=Format.MP4,
)

window_left_depth_writer = Writer(
output_directory / (window_left.name + '_depth.mp4'),
window_left_properties,
output_format=Format.MP4,
)

window_right_depth_writer = Writer(
output_directory / (window_right.name + '_depth.mp4'),
window_right_properties,
output_format=Format.MP4,
)

window_left_writer = Writer(
output_directory / (window_left.name + '.mp4'),
window_left_reader.properties,
window_left_properties,
output_format=Format.MP4,
)

window_right_writer = Writer(
output_directory / (window_right.name + '.mp4'),
window_right_reader.properties,
window_right_properties,
output_format=Format.MP4,
)

Logger.info('Components instantiated')

if skip is not None and skip > 0:
frames_to_skip = skip * ceiling_properties.fps
ceiling_reader.read_skipping(frames_to_skip)
window_left_reader.read_skipping(frames_to_skip)
window_right_reader.read_skipping(frames_to_skip)

while True:
ceiling_frames = ceiling_reader.read_batch()
if ceiling_frames is None:
Expand Down Expand Up @@ -163,34 +210,54 @@ def main(
Logger.error('window_right_poses == None')

Logger.info('Estimating depth...')
ceiling_depth = depth_estimator.predict(ceiling_frames[0])
ceiling_depth = depth_estimator.predict(
ceiling_frames[0],
ceiling_properties,
)
window_left_depth = depth_estimator.predict(
window_left_frames[0],
window_left_properties,
)
window_right_depth = depth_estimator.predict(
window_right_frames[0],
window_right_properties,
)

ceiling_depths = [ceiling_depth for _ in range(n_frames)]
window_left_depths = [window_left_depth for _ in range(n_frames)]
window_right_depths = [window_right_depth for _ in range(n_frames)]
Logger.info('Done!')

Logger.info('Estimating transformations...')
window_left_to_ceiling = (
transformation_estimator.predict_batch(
window_left_to_ceiling_transformation_estimator.predict_batch(
ceiling_poses,
window_left_poses,
ceiling_depths,
[None for _ in range(n_frames)], # type: ignore # safe to pass
window_left_depths,
)
if ceiling_poses is not None and window_left_poses is not None
else None
)

window_right_to_ceiling = (
transformation_estimator.predict_batch(
window_right_to_ceiling_transformation_estimator.predict_batch(
ceiling_poses,
window_right_poses,
ceiling_depths,
[None for _ in range(n_frames)], # type: ignore # safe to pass
window_right_depths,
)
if ceiling_poses is not None and window_right_poses is not None
else None
)
Logger.info('Done!')

if window_left_to_ceiling is None:
Logger.error('window_left_to_ceiling == None')

if window_right_to_ceiling is None:
Logger.error('window_right_to_ceiling == None')

Logger.info('Detecting faces...')
window_left_faces = (
face_estimator.predict_batch(window_left_frames, window_left_poses)
Expand Down Expand Up @@ -241,7 +308,29 @@ def main(
)
Logger.info('Done!')

if window_left_gazes is None:
Logger.error('window_left_gazes == None')

if window_right_gazes is None:
Logger.error('window_right_gazes == None')

Logger.info('Visualizing results...')
ceiling_projection_annotated_frames = ceiling_visualizer.annotate_batch(
ceiling_frames,
[
p.unproject(window_left_properties.calibration, ceiling_depth)
.transform(t.inverse)
.project(ceiling_properties.calibration)
for p, t in zip(window_left_poses or [], window_left_to_ceiling or [])
],
[
p.unproject(window_right_properties.calibration, ceiling_depth)
.transform(t.inverse)
.project(ceiling_properties.calibration)
for p, t in zip(window_right_poses or [], window_right_to_ceiling or [])
],
)

ceiling_annotated_frames = ceiling_visualizer.annotate_batch(
ceiling_frames,
ceiling_poses,
Expand All @@ -264,6 +353,16 @@ def main(
Logger.info('Done!')

Logger.info('Saving results...')
ceiling_projection_writer.write_batch(ceiling_projection_annotated_frames)

ceiling_depth_writer.write_batch([depth.to_frame(d) for d in ceiling_depths])
window_left_depth_writer.write_batch(
[depth.to_frame(d) for d in window_left_depths]
)
window_right_depth_writer.write_batch(
[depth.to_frame(d) for d in window_right_depths]
)

ceiling_writer.write_batch(ceiling_annotated_frames)
window_left_writer.write_batch(window_left_annotated_frames)
window_right_writer.write_batch(window_right_annotated_frames)
Expand Down
16 changes: 16 additions & 0 deletions child_lab_framework/core/algebra.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from enum import IntEnum
from typing import Literal

import numpy as np
from scipy.spatial.transform import Rotation

from ..typing.array import FloatArray1, FloatArray2, FloatArray3, FloatArray6
from .calibration import Calibration
Expand Down Expand Up @@ -31,6 +33,20 @@ def rotation_matrix(angle: float, axis: Axis) -> FloatArray2:
)


def euler_angles_from_rotation_matrix(
rotation: FloatArray2,
) -> np.ndarray[tuple[Literal[3]], np.dtype[np.float32]]:
return (
Rotation.from_matrix(rotation).as_euler('xyz', degrees=False).astype(np.float32)
)


def rotation_matrix_from_euler_angles(
angles: np.ndarray[tuple[Literal[3]], np.dtype[np.float32]],
) -> FloatArray2:
return Rotation.from_euler('xyz', angles, degrees=False).as_matrix()


def normalized(vecs: FloatArray2) -> FloatArray2:
norm = np.linalg.norm(vecs, ord=2.0, axis=1)
return vecs / norm
Expand Down
8 changes: 8 additions & 0 deletions child_lab_framework/core/transformation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from .interface import Projectable, Transformable, Unprojectable

from .error import reprojection_error # isort: skip

from .transformation import (
EuclideanTransformation,
ProjectiveTransformation,
Expand All @@ -7,8 +11,12 @@
from .buffer import Buffer # isort: skip

__all__ = [
'Projectable',
'Transformable',
'Unprojectable',
'Buffer',
'Transformation',
'EuclideanTransformation',
'ProjectiveTransformation',
'reprojection_error',
]
Loading
Loading