child-lab-uj · integraledelebesgue · Nov 23, 2024 · Nov 9, 2024 · Nov 11, 2024 · Nov 9, 2024
diff --git a/child_lab_framework/_cli/cli.py b/child_lab_framework/_cli/cli.py
@@ -189,11 +189,18 @@ def estimate_transformations(
     required=False,
     help='Torch device to use for tensor computations',
 )
+@click.option(
+    '--skip',
+    type=int,
+    required=False,
+    help='Seconds of videos to skip at the beginning',
+)
 # @click_trap()
 def process(
     workspace: Path,
     videos: list[Path],
     device: str | None,
+    skip: int | None,
 ) -> None:
     video_dir = workspace / 'input'
     calibration_dir = workspace / 'calibration'
@@ -227,7 +234,7 @@ def process(
 
     click.echo(f'Processing {"video" if len(videos) == 1 else "videos"}...')
 
-    demo_sequential.main(inputs, device_handle, destination)  # type: ignore
+    demo_sequential.main(inputs, device_handle, destination, skip)  # type: ignore
 
     click.echo('Done!')
 

diff --git a/child_lab_framework/_procedure/demo_sequential.py b/child_lab_framework/_procedure/demo_sequential.py
@@ -4,18 +4,22 @@
 
 import torch
 
+from ..core import transformation
 from ..core.video import Format, Input, Reader, Writer
 from ..logging import Logger
 from ..task import depth, face, gaze, pose
-from ..task.camera import transformation
+from ..task.camera.transformation import heuristic as heuristic_transformation
 from ..task.visualization import Configuration as VisualizationConfiguration
 from ..task.visualization import Visualizer
 
 BATCH_SIZE = 32
 
 
 def main(
-    inputs: tuple[Input, Input, Input], device: torch.device, output_directory: Path
+    inputs: tuple[Input, Input, Input],
+    device: torch.device,
+    output_directory: Path,
+    skip: int | None,
 ) -> None:
     # ignore exceeded allocation limit on MPS and CUDA - very important!
     os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
@@ -38,6 +42,7 @@ def main(
         width=ceiling_properties.width,
         fps=ceiling_properties.fps,
     )
+    window_left_properties = window_left_reader.properties
 
     window_right_reader = Reader(
         window_right,
@@ -46,20 +51,32 @@ def main(
         width=ceiling_properties.width,
         fps=ceiling_properties.fps,
     )
+    window_right_properties = window_right_reader.properties
 
-    depth_estimator = depth.Estimator(executor, device, input=ceiling_reader.properties)
+    depth_estimator = depth.Estimator(executor, device, input=ceiling_properties)
 
-    transformation_estimator = transformation.heuristic.Estimator(
+    transformation_buffer: transformation.Buffer[str] = transformation.Buffer()
+
+    window_left_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
+        executor,
+        transformation_buffer,
+        window_left_properties,
+        ceiling_properties,
+        keypoint_threshold=0.35,
+    )
+
+    window_right_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
         executor,
-        window_left_reader.properties,
-        ceiling_reader.properties,
+        transformation_buffer,
+        window_right_properties,
+        ceiling_properties,
         keypoint_threshold=0.35,
     )
 
     pose_estimator = pose.Estimator(
         executor,
         device,
-        input=ceiling_reader.properties,
+        input=ceiling_properties,
         max_detections=2,
         threshold=0.5,
     )
@@ -69,69 +86,99 @@ def main(
         # A workaround to use the model efficiently on both desktop and server.
         # TODO: remove this as soon as it's possible to specify device per component via CLI/config file.
         device if device == torch.device('cuda') else torch.device('cpu'),
-        input=ceiling_reader.properties,
+        input=ceiling_properties,
         confidence_threshold=0.5,
         suppression_threshold=0.1,
     )
 
     window_left_gaze_estimator = gaze.Estimator(
         executor,
-        input=window_left_reader.properties,
+        input=window_left_properties,
     )
 
     window_right_gaze_estimator = gaze.Estimator(
         executor,
-        input=window_right_reader.properties,
+        input=window_right_properties,
     )
 
     ceiling_gaze_estimator = gaze.ceiling_projection.Estimator(
         executor,
-        ceiling_reader.properties,
-        window_left_reader.properties,
-        window_right_reader.properties,
+        ceiling_properties,
+        window_left_properties,
+        window_right_properties,
     )
 
     # social_distance_estimator = social_distance.Estimator(executor)
     # social_distance_logger = social_distance.FileLogger('dev/output/distance.csv')
 
     ceiling_visualizer = Visualizer(
         executor,
-        properties=ceiling_reader.properties,
+        properties=ceiling_properties,
         configuration=VisualizationConfiguration(),
     )
 
     window_left_visualizer = Visualizer(
         executor,
-        properties=window_left_reader.properties,
+        properties=window_left_properties,
         configuration=VisualizationConfiguration(),
     )
 
     window_right_visualizer = Visualizer(
         executor,
-        properties=window_right_reader.properties,
+        properties=window_right_properties,
         configuration=VisualizationConfiguration(),
     )
 
     ceiling_writer = Writer(
         output_directory / (ceiling.name + '.mp4'),
-        ceiling_reader.properties,
+        ceiling_properties,
+        output_format=Format.MP4,
+    )
+
+    ceiling_projection_writer = Writer(
+        output_directory / (ceiling.name + '_projections.mp4'),
+        ceiling_properties,
+        output_format=Format.MP4,
+    )
+
+    ceiling_depth_writer = Writer(
+        output_directory / (ceiling.name + '_depth.mp4'),
+        ceiling_properties,
+        output_format=Format.MP4,
+    )
+
+    window_left_depth_writer = Writer(
+        output_directory / (window_left.name + '_depth.mp4'),
+        window_left_properties,
+        output_format=Format.MP4,
+    )
+
+    window_right_depth_writer = Writer(
+        output_directory / (window_right.name + '_depth.mp4'),
+        window_right_properties,
         output_format=Format.MP4,
     )
 
     window_left_writer = Writer(
         output_directory / (window_left.name + '.mp4'),
-        window_left_reader.properties,
+        window_left_properties,
         output_format=Format.MP4,
     )
 
     window_right_writer = Writer(
         output_directory / (window_right.name + '.mp4'),
-        window_right_reader.properties,
+        window_right_properties,
         output_format=Format.MP4,
     )
 
     Logger.info('Components instantiated')
 
+    if skip is not None and skip > 0:
+        frames_to_skip = skip * ceiling_properties.fps
+        ceiling_reader.read_skipping(frames_to_skip)
+        window_left_reader.read_skipping(frames_to_skip)
+        window_right_reader.read_skipping(frames_to_skip)
+
     while True:
         ceiling_frames = ceiling_reader.read_batch()
         if ceiling_frames is None:
@@ -163,34 +210,54 @@ def main(
             Logger.error('window_right_poses == None')
 
         Logger.info('Estimating depth...')
-        ceiling_depth = depth_estimator.predict(ceiling_frames[0])
+        ceiling_depth = depth_estimator.predict(
+            ceiling_frames[0],
+            ceiling_properties,
+        )
+        window_left_depth = depth_estimator.predict(
+            window_left_frames[0],
+            window_left_properties,
+        )
+        window_right_depth = depth_estimator.predict(
+            window_right_frames[0],
+            window_right_properties,
+        )
+
         ceiling_depths = [ceiling_depth for _ in range(n_frames)]
+        window_left_depths = [window_left_depth for _ in range(n_frames)]
+        window_right_depths = [window_right_depth for _ in range(n_frames)]
         Logger.info('Done!')
 
         Logger.info('Estimating transformations...')
         window_left_to_ceiling = (
-            transformation_estimator.predict_batch(
+            window_left_to_ceiling_transformation_estimator.predict_batch(
                 ceiling_poses,
                 window_left_poses,
                 ceiling_depths,
-                [None for _ in range(n_frames)],  # type: ignore  # safe to pass
+                window_left_depths,
             )
             if ceiling_poses is not None and window_left_poses is not None
             else None
         )
 
         window_right_to_ceiling = (
-            transformation_estimator.predict_batch(
+            window_right_to_ceiling_transformation_estimator.predict_batch(
                 ceiling_poses,
                 window_right_poses,
                 ceiling_depths,
-                [None for _ in range(n_frames)],  # type: ignore  # safe to pass
+                window_right_depths,
             )
             if ceiling_poses is not None and window_right_poses is not None
             else None
         )
         Logger.info('Done!')
 
+        if window_left_to_ceiling is None:
+            Logger.error('window_left_to_ceiling == None')
+
+        if window_right_to_ceiling is None:
+            Logger.error('window_right_to_ceiling == None')
+
         Logger.info('Detecting faces...')
         window_left_faces = (
             face_estimator.predict_batch(window_left_frames, window_left_poses)
@@ -241,7 +308,29 @@ def main(
         )
         Logger.info('Done!')
 
+        if window_left_gazes is None:
+            Logger.error('window_left_gazes == None')
+
+        if window_right_gazes is None:
+            Logger.error('window_right_gazes == None')
+
         Logger.info('Visualizing results...')
+        ceiling_projection_annotated_frames = ceiling_visualizer.annotate_batch(
+            ceiling_frames,
+            [
+                p.unproject(window_left_properties.calibration, ceiling_depth)
+                .transform(t.inverse)
+                .project(ceiling_properties.calibration)
+                for p, t in zip(window_left_poses or [], window_left_to_ceiling or [])
+            ],
+            [
+                p.unproject(window_right_properties.calibration, ceiling_depth)
+                .transform(t.inverse)
+                .project(ceiling_properties.calibration)
+                for p, t in zip(window_right_poses or [], window_right_to_ceiling or [])
+            ],
+        )
+
         ceiling_annotated_frames = ceiling_visualizer.annotate_batch(
             ceiling_frames,
             ceiling_poses,
@@ -264,6 +353,16 @@ def main(
         Logger.info('Done!')
 
         Logger.info('Saving results...')
+        ceiling_projection_writer.write_batch(ceiling_projection_annotated_frames)
+
+        ceiling_depth_writer.write_batch([depth.to_frame(d) for d in ceiling_depths])
+        window_left_depth_writer.write_batch(
+            [depth.to_frame(d) for d in window_left_depths]
+        )
+        window_right_depth_writer.write_batch(
+            [depth.to_frame(d) for d in window_right_depths]
+        )
+
         ceiling_writer.write_batch(ceiling_annotated_frames)
         window_left_writer.write_batch(window_left_annotated_frames)
         window_right_writer.write_batch(window_right_annotated_frames)

diff --git a/child_lab_framework/core/algebra.py b/child_lab_framework/core/algebra.py
@@ -1,6 +1,8 @@
 from enum import IntEnum
+from typing import Literal
 
 import numpy as np
+from scipy.spatial.transform import Rotation
 
 from ..typing.array import FloatArray1, FloatArray2, FloatArray3, FloatArray6
 from .calibration import Calibration
@@ -31,6 +33,20 @@ def rotation_matrix(angle: float, axis: Axis) -> FloatArray2:
             )
 
 
+def euler_angles_from_rotation_matrix(
+    rotation: FloatArray2,
+) -> np.ndarray[tuple[Literal[3]], np.dtype[np.float32]]:
+    return (
+        Rotation.from_matrix(rotation).as_euler('xyz', degrees=False).astype(np.float32)
+    )
+
+
+def rotation_matrix_from_euler_angles(
+    angles: np.ndarray[tuple[Literal[3]], np.dtype[np.float32]],
+) -> FloatArray2:
+    return Rotation.from_euler('xyz', angles, degrees=False).as_matrix()
+
+
 def normalized(vecs: FloatArray2) -> FloatArray2:
     norm = np.linalg.norm(vecs, ord=2.0, axis=1)
     return vecs / norm

diff --git a/child_lab_framework/core/transformation/__init__.py b/child_lab_framework/core/transformation/__init__.py
@@ -1,3 +1,7 @@
+from .interface import Projectable, Transformable, Unprojectable
+
+from .error import reprojection_error  # isort: skip
+
 from .transformation import (
     EuclideanTransformation,
     ProjectiveTransformation,
@@ -7,8 +11,12 @@
 from .buffer import Buffer  # isort: skip
 
 __all__ = [
+    'Projectable',
+    'Transformable',
+    'Unprojectable',
     'Buffer',
     'Transformation',
     'EuclideanTransformation',
     'ProjectiveTransformation',
+    'reprojection_error',
 ]