Trim events using SoundFile. (#128)

* Trim on read via SoundFile. * Updating regression data. * Removing import of match_sample_length in core.py. * Updating changelog. Co-authored-by: pseeth <prem@descript.com>
justinsalamon · Sep 23, 2020 · 1cbfa8a · 1cbfa8a
1 parent 8ee6a0d
commit 1cbfa8a
Show file tree

Hide file tree

Showing 21 changed files with 489 additions and 481 deletions.
diff --git a/docs/changes.rst b/docs/changes.rst
@@ -2,6 +2,12 @@
 
 Changelog
 ---------
+v1.6.1
+~~~~~~
+- Trimming now happens on read, rather than after read. This prevents the entire file from being loaded into memory. This is helpful for long source audio files.
+- Since the audio processing pipeline has changed, this version will generate marginally different audio data compared to previous versions: the change is not perceptible, but np.allclose() tests on audio from previous versions of Scaper may fail.
+- This change updates the regression data for Scaper's regression tests
+
 v1.6.0
 ~~~~~~
 - Uses soxbindings when installing on Linux or MacOS, which results in better performance.

diff --git a/scaper/core.py b/scaper/core.py
@@ -30,7 +30,6 @@
 from .util import polyphony_gini
 from .util import is_real_number, is_real_array
 from .audio import get_integrated_lufs
-from .audio import match_sample_length
 from .version import version as scaper_version
 
 SUPPORTED_DIST = {"const": _sample_const,
@@ -1799,10 +1798,6 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                         rate=self.sr,
                         channels=self.n_channels
                     )
-                    # Then trim the duration of the background event
-                    tfm.trim(e.value['source_time'],
-                                e.value['source_time'] +
-                                e.value['event_duration'])
 
                     # PROCESS BEFORE COMPUTING LUFS
                     tmpfiles_internal = []
@@ -1811,11 +1806,17 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                         tmpfiles_internal.append(
                             tempfile.NamedTemporaryFile(
                                 suffix='.wav', delete=False))
-                        # read in background off disk
+                        # read in background off disk, using start and stop 
+                        # to only read the necessary audio
+                        event_sr = soundfile.info(e.value['source_file']).samplerate
+                        start = int(e.value['source_time'] * event_sr)
+                        stop = int((e.value['source_time'] + e.value['event_duration']) * event_sr)
                         event_audio, event_sr = soundfile.read(
-                            e.value['source_file'], always_2d=True)
+                            e.value['source_file'], always_2d=True,
+                            start=start, stop=stop)
                         # tile the background along the appropriate dimensions
                         event_audio = np.tile(event_audio, (ntiles, 1))
+                        event_audio = event_audio[:stop]
                         event_audio = tfm.build_array(
                             input_array=event_audio,
                             sample_rate_in=event_sr
@@ -1853,10 +1854,6 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                         rate=self.sr,
                         channels=self.n_channels
                     )
-                    # Trim
-                    tfm.trim(e.value['source_time'],
-                                e.value['source_time'] +
-                                e.value['event_duration'])
 
                     # Pitch shift
                     if e.value['pitch_shift'] is not None:
@@ -1875,9 +1872,14 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                             tempfile.NamedTemporaryFile(
                                 suffix='.wav', delete=False))
 
-                        # synthesize edited foreground sound event
+                        # synthesize edited foreground sound event, 
+                        # doing the trim via soundfile
+                        event_sr = soundfile.info(e.value['source_file']).samplerate
+                        start = int(e.value['source_time'] * event_sr)
+                        stop = int((e.value['source_time'] + e.value['event_duration']) * event_sr)
                         event_audio, event_sr = soundfile.read(
-                            e.value['source_file'], always_2d=True)
+                            e.value['source_file'], always_2d=True,
+                            start=start, stop=stop)
                         event_audio = tfm.build_array(
                             input_array=event_audio,
                             sample_rate_in=event_sr

diff --git a/scaper/version.py b/scaper/version.py
@@ -3,4 +3,4 @@
 """Version info"""
 
 short_version = '1.6'
-version = '1.6.0'
+version = '1.6.1'
diff --git a/tests/data/regression/bgonly_soundscape_20200501_22050.jams b/tests/data/regression/bgonly_soundscape_20200501_22050.jams
@@ -1,19 +1,24 @@
 {
-  "file_metadata": {
-    "artist": "",
-    "jams_version": "0.3.3",
-    "identifiers": {},
-    "duration": 10.0,
-    "title": "",
-    "release": ""
-  },
   "annotations": [
     {
+      "annotation_metadata": {
+        "curator": {
+          "name": "",
+          "email": ""
+        },
+        "annotator": {},
+        "version": "",
+        "corpus": "",
+        "annotation_tools": "",
+        "annotation_rules": "",
+        "validation": "",
+        "data_source": ""
+      },
+      "namespace": "scaper",
       "data": [
         {
-          "confidence": 1.0,
-          "duration": 10.0,
           "time": 0.0,
+          "duration": 10.0,
           "value": {
             "label": "park",
             "source_file": "tests/data/audio/background/park/268903__yonts__city-park-tel-aviv-israel.wav",
@@ -24,45 +29,17 @@
             "role": "background",
             "pitch_shift": null,
             "time_stretch": null
-          }
+          },
+          "confidence": 1.0
         }
       ],
-      "time": 0,
-      "annotation_metadata": {
-        "corpus": "",
-        "validation": "",
-        "annotation_rules": "",
-        "curator": {
-          "name": "",
-          "email": ""
-        },
-        "annotation_tools": "",
-        "data_source": "",
-        "version": "",
-        "annotator": {}
-      },
-      "duration": 10.0,
       "sandbox": {
         "scaper": {
-          "allow_repeated_source": true,
-          "fade_out_len": 0.01,
-          "bg_path": "tests/data/audio/background",
+          "duration": 10.0,
           "original_duration": 10.0,
-          "reverb": 0.2,
-          "polyphony_max": 0,
-          "polyphony_gini": 0,
-          "fg_labels": [
-            "car_horn",
-            "human_voice",
-            "siren"
-          ],
-          "n_events": 0,
-          "scaper_version": "1.3.6",
-          "fade_in_len": 0.01,
-          "n_channels": 1,
           "fg_path": "tests/data/audio/foreground",
-          "protected_labels": [],
-          "ref_db": -20,
+          "bg_path": "tests/data/audio/background",
+          "fg_spec": [],
           "bg_spec": [
             [
               [
@@ -94,21 +71,44 @@
               null
             ]
           ],
-          "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav",
-          "fg_spec": [],
-          "sr": 22050,
-          "allow_repeated_label": true,
+          "fg_labels": [
+            "car_horn",
+            "human_voice",
+            "siren"
+          ],
           "bg_labels": [
             "park",
             "restaurant",
             "street"
           ],
-          "duration": 10.0,
+          "protected_labels": [],
+          "sr": 22050,
+          "ref_db": -20,
+          "n_channels": 1,
+          "fade_in_len": 0.01,
+          "fade_out_len": 0.01,
+          "n_events": 0,
+          "polyphony_max": 0,
+          "polyphony_gini": 0,
+          "allow_repeated_label": true,
+          "allow_repeated_source": true,
+          "reverb": 0.2,
+          "scaper_version": "1.6.1",
+          "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav",
           "isolated_events_audio_path": []
         }
       },
-      "namespace": "scaper"
+      "time": 0,
+      "duration": 10.0
     }
   ],
+  "file_metadata": {
+    "title": "",
+    "artist": "",
+    "release": "",
+    "duration": 10.0,
+    "identifiers": {},
+    "jams_version": "0.3.4"
+  },
   "sandbox": {}
 }
diff --git a/tests/data/regression/bgonly_soundscape_20200501_22050.wav b/tests/data/regression/bgonly_soundscape_20200501_22050.wav
diff --git a/tests/data/regression/bgonly_soundscape_20200501_44100.jams b/tests/data/regression/bgonly_soundscape_20200501_44100.jams
@@ -1,19 +1,24 @@
 {
-  "file_metadata": {
-    "artist": "",
-    "jams_version": "0.3.3",
-    "identifiers": {},
-    "duration": 10.0,
-    "title": "",
-    "release": ""
-  },
   "annotations": [
     {
+      "annotation_metadata": {
+        "curator": {
+          "name": "",
+          "email": ""
+        },
+        "annotator": {},
+        "version": "",
+        "corpus": "",
+        "annotation_tools": "",
+        "annotation_rules": "",
+        "validation": "",
+        "data_source": ""
+      },
+      "namespace": "scaper",
       "data": [
         {
-          "confidence": 1.0,
-          "duration": 10.0,
           "time": 0.0,
+          "duration": 10.0,
           "value": {
             "label": "park",
             "source_file": "tests/data/audio/background/park/268903__yonts__city-park-tel-aviv-israel.wav",
@@ -24,45 +29,17 @@
             "role": "background",
             "pitch_shift": null,
             "time_stretch": null
-          }
+          },
+          "confidence": 1.0
         }
       ],
-      "time": 0,
-      "annotation_metadata": {
-        "corpus": "",
-        "validation": "",
-        "annotation_rules": "",
-        "curator": {
-          "name": "",
-          "email": ""
-        },
-        "annotation_tools": "",
-        "data_source": "",
-        "version": "",
-        "annotator": {}
-      },
-      "duration": 10.0,
       "sandbox": {
         "scaper": {
-          "allow_repeated_source": true,
-          "fade_out_len": 0.01,
-          "bg_path": "tests/data/audio/background",
+          "duration": 10.0,
           "original_duration": 10.0,
-          "reverb": 0.2,
-          "polyphony_max": 0,
-          "polyphony_gini": 0,
-          "fg_labels": [
-            "car_horn",
-            "human_voice",
-            "siren"
-          ],
-          "n_events": 0,
-          "scaper_version": "1.3.6",
-          "fade_in_len": 0.01,
-          "n_channels": 1,
           "fg_path": "tests/data/audio/foreground",
-          "protected_labels": [],
-          "ref_db": -20,
+          "bg_path": "tests/data/audio/background",
+          "fg_spec": [],
           "bg_spec": [
             [
               [
@@ -94,21 +71,44 @@
               null
             ]
           ],
-          "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav",
-          "fg_spec": [],
-          "sr": 44100,
-          "allow_repeated_label": true,
+          "fg_labels": [
+            "car_horn",
+            "human_voice",
+            "siren"
+          ],
           "bg_labels": [
             "park",
             "restaurant",
             "street"
           ],
-          "duration": 10.0,
+          "protected_labels": [],
+          "sr": 44100,
+          "ref_db": -20,
+          "n_channels": 1,
+          "fade_in_len": 0.01,
+          "fade_out_len": 0.01,
+          "n_events": 0,
+          "polyphony_max": 0,
+          "polyphony_gini": 0,
+          "allow_repeated_label": true,
+          "allow_repeated_source": true,
+          "reverb": 0.2,
+          "scaper_version": "1.6.1",
+          "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav",
           "isolated_events_audio_path": []
         }
       },
-      "namespace": "scaper"
+      "time": 0,
+      "duration": 10.0
     }
   ],
+  "file_metadata": {
+    "title": "",
+    "artist": "",
+    "release": "",
+    "duration": 10.0,
+    "identifiers": {},
+    "jams_version": "0.3.4"
+  },
   "sandbox": {}
 }
diff --git a/tests/data/regression/bgonly_soundscape_20200501_44100.wav b/tests/data/regression/bgonly_soundscape_20200501_44100.wav