From dacccbdbbcaaa5f1d9a2c2853559d20bcfeb8a7c Mon Sep 17 00:00:00 2001
From: Jon Nordby <jononor@gmail.com>
Date: Mon, 30 Aug 2021 13:02:57 +0200
Subject: [PATCH] get_timestamp_embeddings: Log durations

To see if numpy/tensorflow conversions are problematic
---
 openl3_hear/hear2021.py | 31 ++++++++++++++++++++++++++++---
 requirements.txt        |  1 +
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/openl3_hear/hear2021.py b/openl3_hear/hear2021.py
index b8e1339..395cfc0 100644
--- a/openl3_hear/hear2021.py
+++ b/openl3_hear/hear2021.py
@@ -9,8 +9,11 @@
 HOP_SIZE_TIMESTAMPS = 0.050 # <50 ms recommended
 HOP_SIZE_SCENE = 0.5
 
+import time
+
 import openl3
 import numpy
+import structlog
 import tensorflow as tf
 
 #import tensorflow_datasets
@@ -19,6 +22,9 @@
 from typing import NewType, Tuple
 Tensor = NewType('Tensor', object)
 
+log = structlog.get_logger()
+
+
 class Model(tf.Module):
     def __init__(self, model, sample_rate=48000, embedding_size=512):
         self.sample_rate = sample_rate
@@ -75,16 +81,27 @@ def get_embedding(samples):
     # Compute embeddings for each clip
     embeddings = []
     timestamps = []
+
+    # convert to Numpy
+    pre_convert_start = time.time()
+    samples = numpy.array(audio)
+    pre_convert_end = time.time()
+
+    compute_start = time.time()
     for sound_no in range(audio.shape[0]):
-        samples = numpy.array(audio[sound_no, :])
-        emb, ts = get_embedding(samples)
+        emb, ts = get_embedding(samples[sound_no, :])
         embeddings.append(emb)
         timestamps.append(ts)
+    compute_end = time.time()
+
+    # convert to Tensorflow
+    post_convert_start = time.time()
     emb = numpy.stack(embeddings)
     ts = numpy.stack(timestamps)
     emb = tf.convert_to_tensor(emb)
     ts = tf.convert_to_tensor(ts)
-    
+    post_convert_end = time.time()
+
     # post-conditions
     assert len(ts.shape) == 2 
     assert len(ts) >= 1
@@ -96,6 +113,14 @@ def get_embedding(samples):
     if len(ts) >= 2:
         assert ts[0,1] == ts[0,0] + hop_size
 
+    log.debug('get-timestamp-embeddings',
+        n_samples=audio.shape[0],
+        sample_length=audio.shape[1]/model.sample_rate,
+        pre_convert_duration=pre_convert_end-pre_convert_start,
+        post_convert_duration=post_convert_end-post_convert_start,
+        compute_duration=compute_end-compute_start,
+    )
+
     # XXX: are timestampes centered?
     # first results seems to be 0.0, which would indicate that window
     # starts at -window/2 ?
diff --git a/requirements.txt b/requirements.txt
index 1f23d5e..664210a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ openl3==0.4.1
 tensorflow-datasets==4.3.0
 tensorflow==2.4.2
 hearvalidator==2021.0.2
+structlog==21.1.0