Skip to content

Commit

Permalink
Integrated Buffer with heuristic camera transformation estimation
Browse files Browse the repository at this point in the history
Improved heuristic transformation estimation algorithms

Enhanced `demo_sequential` procedure with depth and reprojection visualizations
  • Loading branch information
integraledelebesgue committed Nov 23, 2024
1 parent 1860b13 commit 11164a2
Show file tree
Hide file tree
Showing 11 changed files with 443 additions and 237 deletions.
9 changes: 8 additions & 1 deletion child_lab_framework/_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,18 @@ def estimate_transformations(
required=False,
help='Torch device to use for tensor computations',
)
@click.option(
'--skip',
type=int,
required=False,
help='Seconds of videos to skip at the beginning',
)
# @click_trap()
def process(
workspace: Path,
videos: list[Path],
device: str | None,
skip: int | None,
) -> None:
video_dir = workspace / 'input'
calibration_dir = workspace / 'calibration'
Expand Down Expand Up @@ -227,7 +234,7 @@ def process(

click.echo(f'Processing {"video" if len(videos) == 1 else "videos"}...')

demo_sequential.main(inputs, device_handle, destination) # type: ignore
demo_sequential.main(inputs, device_handle, destination, skip) # type: ignore

click.echo('Done!')

Expand Down
147 changes: 123 additions & 24 deletions child_lab_framework/_procedure/demo_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@

import torch

from ..core import transformation
from ..core.video import Format, Input, Reader, Writer
from ..logging import Logger
from ..task import depth, face, gaze, pose
from ..task.camera import transformation
from ..task.camera.transformation import heuristic as heuristic_transformation
from ..task.visualization import Configuration as VisualizationConfiguration
from ..task.visualization import Visualizer

BATCH_SIZE = 32


def main(
inputs: tuple[Input, Input, Input], device: torch.device, output_directory: Path
inputs: tuple[Input, Input, Input],
device: torch.device,
output_directory: Path,
skip: int | None,
) -> None:
# ignore exceeded allocation limit on MPS and CUDA - very important!
os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
Expand All @@ -38,6 +42,7 @@ def main(
width=ceiling_properties.width,
fps=ceiling_properties.fps,
)
window_left_properties = window_left_reader.properties

window_right_reader = Reader(
window_right,
Expand All @@ -46,20 +51,32 @@ def main(
width=ceiling_properties.width,
fps=ceiling_properties.fps,
)
window_right_properties = window_right_reader.properties

depth_estimator = depth.Estimator(executor, device, input=ceiling_reader.properties)
depth_estimator = depth.Estimator(executor, device, input=ceiling_properties)

transformation_estimator = transformation.heuristic.Estimator(
transformation_buffer: transformation.Buffer[str] = transformation.Buffer()

window_left_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
executor,
transformation_buffer,
window_left_properties,
ceiling_properties,
keypoint_threshold=0.35,
)

window_right_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
executor,
window_left_reader.properties,
ceiling_reader.properties,
transformation_buffer,
window_right_properties,
ceiling_properties,
keypoint_threshold=0.35,
)

pose_estimator = pose.Estimator(
executor,
device,
input=ceiling_reader.properties,
input=ceiling_properties,
max_detections=2,
threshold=0.5,
)
Expand All @@ -69,69 +86,99 @@ def main(
# A workaround to use the model efficiently on both desktop and server.
# TODO: remove this as soon as it's possible to specify device per component via CLI/config file.
device if device == torch.device('cuda') else torch.device('cpu'),
input=ceiling_reader.properties,
input=ceiling_properties,
confidence_threshold=0.5,
suppression_threshold=0.1,
)

window_left_gaze_estimator = gaze.Estimator(
executor,
input=window_left_reader.properties,
input=window_left_properties,
)

window_right_gaze_estimator = gaze.Estimator(
executor,
input=window_right_reader.properties,
input=window_right_properties,
)

ceiling_gaze_estimator = gaze.ceiling_projection.Estimator(
executor,
ceiling_reader.properties,
window_left_reader.properties,
window_right_reader.properties,
ceiling_properties,
window_left_properties,
window_right_properties,
)

# social_distance_estimator = social_distance.Estimator(executor)
# social_distance_logger = social_distance.FileLogger('dev/output/distance.csv')

ceiling_visualizer = Visualizer(
executor,
properties=ceiling_reader.properties,
properties=ceiling_properties,
configuration=VisualizationConfiguration(),
)

window_left_visualizer = Visualizer(
executor,
properties=window_left_reader.properties,
properties=window_left_properties,
configuration=VisualizationConfiguration(),
)

window_right_visualizer = Visualizer(
executor,
properties=window_right_reader.properties,
properties=window_right_properties,
configuration=VisualizationConfiguration(),
)

ceiling_writer = Writer(
output_directory / (ceiling.name + '.mp4'),
ceiling_reader.properties,
ceiling_properties,
output_format=Format.MP4,
)

ceiling_projection_writer = Writer(
output_directory / (ceiling.name + '_projections.mp4'),
ceiling_properties,
output_format=Format.MP4,
)

ceiling_depth_writer = Writer(
output_directory / (ceiling.name + '_depth.mp4'),
ceiling_properties,
output_format=Format.MP4,
)

window_left_depth_writer = Writer(
output_directory / (window_left.name + '_depth.mp4'),
window_left_properties,
output_format=Format.MP4,
)

window_right_depth_writer = Writer(
output_directory / (window_right.name + '_depth.mp4'),
window_right_properties,
output_format=Format.MP4,
)

window_left_writer = Writer(
output_directory / (window_left.name + '.mp4'),
window_left_reader.properties,
window_left_properties,
output_format=Format.MP4,
)

window_right_writer = Writer(
output_directory / (window_right.name + '.mp4'),
window_right_reader.properties,
window_right_properties,
output_format=Format.MP4,
)

Logger.info('Components instantiated')

if skip is not None and skip > 0:
frames_to_skip = skip * ceiling_properties.fps
ceiling_reader.read_skipping(frames_to_skip)
window_left_reader.read_skipping(frames_to_skip)
window_right_reader.read_skipping(frames_to_skip)

while True:
ceiling_frames = ceiling_reader.read_batch()
if ceiling_frames is None:
Expand Down Expand Up @@ -163,34 +210,54 @@ def main(
Logger.error('window_right_poses == None')

Logger.info('Estimating depth...')
ceiling_depth = depth_estimator.predict(ceiling_frames[0])
ceiling_depth = depth_estimator.predict(
ceiling_frames[0],
ceiling_properties,
)
window_left_depth = depth_estimator.predict(
window_left_frames[0],
window_left_properties,
)
window_right_depth = depth_estimator.predict(
window_right_frames[0],
window_right_properties,
)

ceiling_depths = [ceiling_depth for _ in range(n_frames)]
window_left_depths = [window_left_depth for _ in range(n_frames)]
window_right_depths = [window_right_depth for _ in range(n_frames)]
Logger.info('Done!')

Logger.info('Estimating transformations...')
window_left_to_ceiling = (
transformation_estimator.predict_batch(
window_left_to_ceiling_transformation_estimator.predict_batch(
ceiling_poses,
window_left_poses,
ceiling_depths,
[None for _ in range(n_frames)], # type: ignore # safe to pass
window_left_depths,
)
if ceiling_poses is not None and window_left_poses is not None
else None
)

window_right_to_ceiling = (
transformation_estimator.predict_batch(
window_right_to_ceiling_transformation_estimator.predict_batch(
ceiling_poses,
window_right_poses,
ceiling_depths,
[None for _ in range(n_frames)], # type: ignore # safe to pass
window_right_depths,
)
if ceiling_poses is not None and window_right_poses is not None
else None
)
Logger.info('Done!')

if window_left_to_ceiling is None:
Logger.error('window_left_to_ceiling == None')

if window_right_to_ceiling is None:
Logger.error('window_right_to_ceiling == None')

Logger.info('Detecting faces...')
window_left_faces = (
face_estimator.predict_batch(window_left_frames, window_left_poses)
Expand Down Expand Up @@ -241,7 +308,29 @@ def main(
)
Logger.info('Done!')

if window_left_gazes is None:
Logger.error('window_left_gazes == None')

if window_right_gazes is None:
Logger.error('window_right_gazes == None')

Logger.info('Visualizing results...')
ceiling_projection_annotated_frames = ceiling_visualizer.annotate_batch(
ceiling_frames,
[
p.unproject(window_left_properties.calibration, ceiling_depth)
.transform(t.inverse)
.project(ceiling_properties.calibration)
for p, t in zip(window_left_poses or [], window_left_to_ceiling or [])
],
[
p.unproject(window_right_properties.calibration, ceiling_depth)
.transform(t.inverse)
.project(ceiling_properties.calibration)
for p, t in zip(window_right_poses or [], window_right_to_ceiling or [])
],
)

ceiling_annotated_frames = ceiling_visualizer.annotate_batch(
ceiling_frames,
ceiling_poses,
Expand All @@ -264,6 +353,16 @@ def main(
Logger.info('Done!')

Logger.info('Saving results...')
ceiling_projection_writer.write_batch(ceiling_projection_annotated_frames)

ceiling_depth_writer.write_batch([depth.to_frame(d) for d in ceiling_depths])
window_left_depth_writer.write_batch(
[depth.to_frame(d) for d in window_left_depths]
)
window_right_depth_writer.write_batch(
[depth.to_frame(d) for d in window_right_depths]
)

ceiling_writer.write_batch(ceiling_annotated_frames)
window_left_writer.write_batch(window_left_annotated_frames)
window_right_writer.write_batch(window_right_annotated_frames)
Expand Down
16 changes: 16 additions & 0 deletions child_lab_framework/core/algebra.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from enum import IntEnum
from typing import Literal

import numpy as np
from scipy.spatial.transform import Rotation

from ..typing.array import FloatArray1, FloatArray2, FloatArray3, FloatArray6
from .calibration import Calibration
Expand Down Expand Up @@ -31,6 +33,20 @@ def rotation_matrix(angle: float, axis: Axis) -> FloatArray2:
)


def euler_angles_from_rotation_matrix(
rotation: FloatArray2,
) -> np.ndarray[tuple[Literal[3]], np.dtype[np.float32]]:
return (
Rotation.from_matrix(rotation).as_euler('xyz', degrees=False).astype(np.float32)
)


def rotation_matrix_from_euler_angles(
angles: np.ndarray[tuple[Literal[3]], np.dtype[np.float32]],
) -> FloatArray2:
return Rotation.from_euler('xyz', angles, degrees=False).as_matrix()


def normalized(vecs: FloatArray2) -> FloatArray2:
norm = np.linalg.norm(vecs, ord=2.0, axis=1)
return vecs / norm
Expand Down
3 changes: 3 additions & 0 deletions child_lab_framework/core/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Format(Enum):

@dataclass(frozen=True, repr=False)
class Properties:
name: str
length: int
height: int
width: int
Expand Down Expand Up @@ -93,6 +94,7 @@ def __init__(
)

self.__input_properties = Properties(
input.name,
input_length,
input_height,
input_width,
Expand All @@ -102,6 +104,7 @@ def __init__(

# Output properties with maybe mimicked parameters
self.properties = Properties(
input.name,
input_length * self.__frame_repetitions,
mimicked_height,
mimicked_width,
Expand Down
Loading

0 comments on commit 11164a2

Please sign in to comment.