From dacccbdbbcaaa5f1d9a2c2853559d20bcfeb8a7c Mon Sep 17 00:00:00 2001 From: Jon Nordby Date: Mon, 30 Aug 2021 13:02:57 +0200 Subject: [PATCH] get_timestamp_embeddings: Log durations To see if numpy/tensorflow conversions are problematic --- openl3_hear/hear2021.py | 31 ++++++++++++++++++++++++++++--- requirements.txt | 1 + 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/openl3_hear/hear2021.py b/openl3_hear/hear2021.py index b8e1339..395cfc0 100644 --- a/openl3_hear/hear2021.py +++ b/openl3_hear/hear2021.py @@ -9,8 +9,11 @@ HOP_SIZE_TIMESTAMPS = 0.050 # <50 ms recommended HOP_SIZE_SCENE = 0.5 +import time + import openl3 import numpy +import structlog import tensorflow as tf #import tensorflow_datasets @@ -19,6 +22,9 @@ from typing import NewType, Tuple Tensor = NewType('Tensor', object) +log = structlog.get_logger() + + class Model(tf.Module): def __init__(self, model, sample_rate=48000, embedding_size=512): self.sample_rate = sample_rate @@ -75,16 +81,27 @@ def get_embedding(samples): # Compute embeddings for each clip embeddings = [] timestamps = [] + + # convert to Numpy + pre_convert_start = time.time() + samples = numpy.array(audio) + pre_convert_end = time.time() + + compute_start = time.time() for sound_no in range(audio.shape[0]): - samples = numpy.array(audio[sound_no, :]) - emb, ts = get_embedding(samples) + emb, ts = get_embedding(samples[sound_no, :]) embeddings.append(emb) timestamps.append(ts) + compute_end = time.time() + + # convert to Tensorflow + post_convert_start = time.time() emb = numpy.stack(embeddings) ts = numpy.stack(timestamps) emb = tf.convert_to_tensor(emb) ts = tf.convert_to_tensor(ts) - + post_convert_end = time.time() + # post-conditions assert len(ts.shape) == 2 assert len(ts) >= 1 @@ -96,6 +113,14 @@ def get_embedding(samples): if len(ts) >= 2: assert ts[0,1] == ts[0,0] + hop_size + log.debug('get-timestamp-embeddings', + n_samples=audio.shape[0], + sample_length=audio.shape[1]/model.sample_rate, + pre_convert_duration=pre_convert_end-pre_convert_start, + post_convert_duration=post_convert_end-post_convert_start, + compute_duration=compute_end-compute_start, + ) + # XXX: are timestampes centered? # first results seems to be 0.0, which would indicate that window # starts at -window/2 ? diff --git a/requirements.txt b/requirements.txt index 1f23d5e..664210a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ openl3==0.4.1 tensorflow-datasets==4.3.0 tensorflow==2.4.2 hearvalidator==2021.0.2 +structlog==21.1.0