Speeding up Scaper (part one) (#116)

* Adds a profiling script. (#106) Adding a profiling script to Scaper that we can use to keep track of performance. The script generates 100 audio samples and records info about the run. * adding some requirements needed for tests and profiling, adding a profile script * updating a few variables * adding profiling script and csv for logging results * expanding details computer by profile script * adjusting script again Co-authored-by: pseeth <[email protected]> * Replacing sox.file_info calls with soundfile.info(...).duration calls (#110) * switching to soundfile for checking duration speeds things up * pysoundfile has moved to soundfile on pip, updating setup.py * install only one version of soundfile... * Using Soundfile I/O operations + build_array functions from PySox (#111) This changes the mechanisms within the `_generate_audio` function to use SoundFile for I/O and to use the build_array function from `sox` 1.4.0b0. * Switching to using "build_array" + soundfile operations. * Using soundfile ops + build_array gives a performance boost. * np.pad in python 2.7 requires mode as a positional argument. * source -> event in _generate_audio for structures holding numpy arrays * cleaning up a stray comment * adding a comment about needing both convert and set_output_format. * adding back in a line for cleaner git diff * Changing nomenclature, cleaning some stuff up. * Had a duplicate line. Fixed. * Removing context manager that was used for an easier CR. * Changing tfm.fade operation to a numpy operation. (#117) Fade operation happens directly on the numpy arrays containing the audio samples now, rather than going through sox. * Changing fade to equiv numpy op * Switching to using fade in-memory gives slightly more speed. * Fixing some multichannel bugs when switching to in-memory ops. (#118) * Fixing some multichannel bugs. * Updating the test case, sidestepping multi-channel regression data for now. * Pinning pyristent to support Python 2.7, 3.4. * Pinning jsonschema instead. * Pinning pyristent to a working version, hopefully. * pyristent -> pyrsistent *facepalm* * Fix for Issue #113, trimming with isolated events. (#115) Fixes a bug that happens when you trim an event, then generate from the trimmed JAMS file, with saving of isolated sources enabled. * Updating test for trimming soundscapes to take into account saving isolated sources * Pushing the actual fix to generate_from_jams. * Updating changelog and bumping version * Using TemporaryDirectory in the test case now, rather than relying on the whims of tempfile * Raising atol in a test from 1e-5 to 1e-4. * Adding subtype to soundfile.write to get rid of precision issues. Co-authored-by: pseeth <[email protected]> * Bumping to 1.4.0 and changing sox dependency. * Updating changelog again. * Updating profiler results. Co-authored-by: pseeth <[email protected]>
justinsalamon · Sep 19, 2020 · 69778e7 · 69778e7
1 parent 520ae60
commit 69778e7
Show file tree

Hide file tree

Showing 8 changed files with 418 additions and 223 deletions.
diff --git a/docs/changes.rst b/docs/changes.rst
@@ -2,6 +2,11 @@
 
 Changelog
 ---------
+v1.4.0
+~~~~~~
+- Operations on all files happen in-memory now, via new PySox features (build_array) and numpy operations for applying fades.
+- Scaper is faster now due to the in-memory changes.
+
 v1.3.9
 ~~~~~~
 - Fixed a bug where trim before generating soundscapes from a JAMS file with saving of isolated events resulted in incorrect soundscape audio.

diff --git a/scaper/audio.py b/scaper/audio.py
@@ -80,7 +80,7 @@ def get_integrated_lufs(filepath, min_duration=0.5):
 
     """
     try:
-        duration = sox.file_info.duration(filepath)
+        duration = soundfile.info(filepath).duration
     except Exception as e:
         raise ScaperError(
             'Unable to obtain LUFS for {:s}, error message:\n{:s}'.format(

diff --git a/scaper/core.py b/scaper/core.py
diff --git a/scaper/version.py b/scaper/version.py
@@ -2,5 +2,5 @@
 # -*- coding: utf-8 -*-
 """Version info"""
 
-short_version = '1.3'
-version = '1.3.9'
+short_version = '1.4'
+version = '1.4.0'
diff --git a/setup.py b/setup.py
@@ -36,17 +36,18 @@
             "Programming Language :: Python :: 3.6",
         ],
     install_requires=[
-        'sox>=1.3.3',
+        'sox==1.4.0',
+        'pyrsistent==0.15.4',
         'jams>=0.3.2',
         'numpy>=1.13.3',
-        'pysoundfile'
+        'soundfile',
     ],
     extras_require={
         'docs': [
                 'sphinx',  # autodoc was broken in 1.3.1
                 'sphinx_rtd_theme',
                 'sphinx_issues',
             ],
-        'tests': ['backports.tempfile', 'pysoundfile']
+        'tests': ['backports.tempfile', 'pytest', 'pytest-cov', 'tqdm']
     }
 )
diff --git a/tests/profile_results.csv b/tests/profile_results.csv
@@ -0,0 +1,6 @@
+time_of_run,scaper_version,python_version,system,machine,processor,n_cpu,n_workers,memory,n_soundscapes,execution_time,git_commit_hash
+2020-07-17 14:13:46.982171,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,149.7468,e0c08d4f6eb10bc0b337a9d47f86b3b110ed0836
+2020-07-17 14:59:33.707885,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,135.1724,c780d270b0ea0c691e1cc1dbf725d1c4b35e5299
+2020-07-20 14:39:41.950552,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,118.7033,8c0cfe3c14e06bf46bcd6480f6be5991b0fba077
+2020-07-21 11:04:32.178729,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,114.4563,e903933594cb86b187c6c79066f969a9653d1897
+2020-09-18 16:57:39.359920,1.4.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,121.6504,d9346f71a7992e7a2578186a7ad9238d93aa3e66
diff --git a/tests/profile_speed.py b/tests/profile_speed.py
@@ -0,0 +1,168 @@
+"""
+This is a profiling script to check the performance of
+Scaper. It generates 100 soundscapes in sequence 
+(no parallelization). Running it on 2019 Macbook Pro
+currently takes 158.68 seconds (02:38).
+"""
+
+import scaper
+import numpy as np
+import tempfile
+import os
+import tqdm
+import zipfile
+import subprocess
+import time
+import csv
+import platform
+import psutil
+import datetime
+import math
+import multiprocessing
+
+# Download the audio automatically
+FIX_DIR = 'tests/data/'
+
+def get_git_commit_hash():
+    process = subprocess.Popen(
+        ['git', 'rev-parse', 'HEAD'], shell=False, stdout=subprocess.PIPE)
+    git_head_hash = process.communicate()[0].strip().decode('utf-8')
+    return git_head_hash
+
+def convert_size(size_bytes):
+   if size_bytes == 0:
+       return "0B"
+   size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+   i = int(math.floor(math.log(size_bytes, 1024)))
+   p = math.pow(1024, i)
+   s = round(size_bytes / p, 2)
+   return "%s %s" % (s, size_name[i])
+
+with tempfile.TemporaryDirectory() as tmpdir:  
+    path_to_audio = os.path.join(FIX_DIR, 'audio/')
+    # OUTPUT FOLDER
+    outfolder = tmpdir
+
+    # SCAPER SETTINGS
+    fg_folder = os.path.join(path_to_audio, 'foreground')
+    bg_folder = os.path.join(path_to_audio, 'background')
+
+    # If we parallelize this script, change this accordingly
+    n_workers = 1
+
+    n_soundscapes = 100
+    ref_db = -50
+    duration = 10.0
+
+    min_events = 1
+    max_events = 9
+
+    event_time_dist = 'truncnorm'
+    event_time_mean = 5.0
+    event_time_std = 2.0
+    event_time_min = 0.0
+    event_time_max = 10.0
+
+    source_time_dist = 'const'
+    source_time = 0.0
+
+    event_duration_dist = 'uniform'
+    event_duration_min = 0.5
+    event_duration_max = 4.0
+
+    snr_dist = 'uniform'
+    snr_min = 6
+    snr_max = 30
+
+    pitch_dist = 'uniform'
+    pitch_min = -3.0
+    pitch_max = 3.0
+
+    time_stretch_dist = 'uniform'
+    time_stretch_min = 0.8
+    time_stretch_max = 1.2
+
+    # generate a random seed for this Scaper object
+    seed = 123
+
+    # create a scaper that will be used below
+    sc = scaper.Scaper(duration, fg_folder, bg_folder, random_state=seed)
+    sc.protected_labels = []
+    sc.ref_db = ref_db
+
+    # Generate 100 soundscapes using a truncated normal distribution of start times
+    start_time = time.time()
+
+    for n in tqdm.trange(n_soundscapes):
+        print('Generating soundscape: {:d}/{:d}'.format(n+1, n_soundscapes))
+
+        # reset the event specifications for foreground and background at the 
+        # beginning of each loop to clear all previously added events
+        sc.reset_bg_event_spec()
+        sc.reset_fg_event_spec()
+
+        # add background
+        sc.add_background(label=('choose', []),
+                            source_file=('choose', []),
+                            source_time=('const', 0))
+        sc.fade_in_len = 0.01
+        sc.fade_out_len = 0.01
+
+        # add random number of foreground events
+        n_events = np.random.randint(min_events, max_events+1)
+        for _ in range(n_events):
+            sc.add_event(label=('choose', []),
+                            source_file=('choose', []),
+                            source_time=(source_time_dist, source_time),
+                            event_time=(event_time_dist, event_time_mean, event_time_std, event_time_min, event_time_max),
+                            event_duration=(event_duration_dist, event_duration_min, event_duration_max),
+                            snr=(snr_dist, snr_min, snr_max),
+                            pitch_shift=(pitch_dist, pitch_min, pitch_max),
+                            time_stretch=(time_stretch_dist, time_stretch_min, time_stretch_max)
+            )
+        # generate
+        audiofile = os.path.join(outfolder, "soundscape_unimodal{:d}.wav".format(n))
+        jamsfile = os.path.join(outfolder, "soundscape_unimodal{:d}.jams".format(n))
+        txtfile = os.path.join(outfolder, "soundscape_unimodal{:d}.txt".format(n))
+
+        sc.generate(audiofile, jamsfile,
+                    allow_repeated_label=True,
+                    allow_repeated_source=True,
+                    reverb=0.1,
+                    disable_sox_warnings=True,
+                    no_audio=False,
+                    txt_path=txtfile)
+
+    time_taken = time.time() - start_time
+    uname = platform.uname()
+
+    row = {
+        'time_of_run': str(datetime.datetime.now()),
+        'scaper_version': scaper.__version__,
+        'python_version': platform.python_version(),
+        'system': uname.system,
+        'machine': uname.machine,
+        'processor': uname.processor,
+        'n_cpu': multiprocessing.cpu_count(),
+        'n_workers': n_workers,
+        'memory': convert_size(psutil.virtual_memory().total),
+        'n_soundscapes': n_soundscapes,        
+        'execution_time': np.round(time_taken, 4),
+        'git_commit_hash': get_git_commit_hash(),
+    }
+
+    fieldnames = list(row.keys())
+
+    results_path = 'tests/profile_results.csv'
+    write_header = not os.path.exists(results_path)
+
+    with open(results_path, 'a') as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        if write_header:
+            writer.writeheader()
+        writer.writerow(row)
+
+    with open(results_path, 'r') as f:
+        csv_f = csv.reader(f)
+        for row in csv_f:
+            print('{:<30}  {:<15}  {:<15}  {:<10} {:<10} {:<10} {:<5} {:<10} {:<10} {:<15} {:<10} {:}'.format(*row))
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -217,7 +217,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file,
             # Trim does not currently support trimming isolated events, but if/when
             # we add that functionality, this test should be updated to test that
             # as well, using the files in orig_events_path (currently unused).
-            assert np.allclose(gen_wav, sum(gen_audio), atol=atol, rtol=rtol)
+            assert np.allclose(gen_wav, sum(gen_audio), atol=1e-8, rtol=rtol)
 
         # generate, then generate from the jams and compare audio files
         # repeat 5 times
@@ -1000,7 +1000,7 @@ def test_scaper_instantiate_event():
     instantiated_event = sc._instantiate_event(
         fg_event10, disable_instantiation_warnings=True)
     assert instantiated_event.source_time == 0
-    assert instantiated_event.event_duration == 0.806236
+    assert np.allclose(instantiated_event.event_duration, 0.806236, atol=1e-5)
 
     # repeated label when not allowed throws error
     sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH)
@@ -1317,21 +1317,23 @@ def _create_scaper_with_random_seed(seed):
 
 def test_generate_audio():
     for sr in SAMPLE_RATES:
-        REG_WAV_PATH = TEST_PATHS[sr]['REG'].wav
-        REG_BGONLY_WAV_PATH = TEST_PATHS[sr]['REG_BGONLY'].wav
-        REG_REVERB_WAV_PATH = TEST_PATHS[sr]['REG_REVERB'].wav
-        _test_generate_audio(sr, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH)
+        for n_ch in range(1, 3):
+            REG_WAV_PATH = TEST_PATHS[sr]['REG'].wav
+            REG_BGONLY_WAV_PATH = TEST_PATHS[sr]['REG_BGONLY'].wav
+            REG_REVERB_WAV_PATH = TEST_PATHS[sr]['REG_REVERB'].wav
+            _test_generate_audio(sr, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, n_ch)
 
 
-def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, atol=1e-4, rtol=1e-8):
+def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, N_CHANNELS, atol=1e-4, rtol=1e-8):
     # Regression test: same spec, same audio (not this will fail if we update
     # any of the audio processing techniques used (e.g. change time stretching
     # algorithm.
     sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH)
     sc.ref_db = -50
     sc.sr = SR
+    sc.n_channels = N_CHANNELS
 
-    print("TEST SR: {}".format(SR))
+    print("TEST SR: {}, # OF CHANNELS: {}".format(SR, N_CHANNELS))
 
     # background
     sc.add_background(
@@ -1389,24 +1391,30 @@ def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_P
         sc._generate_audio(wav_file.name, jam.annotations[0])
 
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
         # with reverb
         sc._generate_audio(wav_file.name, jam.annotations[0], reverb=0.2)
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_REVERB_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_REVERB_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
         # Don't disable sox warnings (just to cover line)
         sc._generate_audio(wav_file.name, jam.annotations[0],
                            disable_sox_warnings=False)
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
         # namespace must be scaper
         jam.annotations[0].namespace = 'tag_open'
@@ -1442,9 +1450,11 @@ def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_P
         jam = sc._instantiate(disable_instantiation_warnings=True, reverb=reverb)
         sc._generate_audio(wav_file.name, jam.annotations[0], reverb=reverb)
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
 
 def create_scaper_scene_without_random_seed():
@@ -1567,7 +1577,7 @@ def _delete_files(mix_file, directory):
             isolated_audio.append(_isolated_sandbox_audio)
 
         # the sum of the isolated audio should sum to the soundscape
-        assert np.allclose(sum(isolated_audio), soundscape_audio, atol=1e-4, rtol=1e-8)
+        assert np.allclose(sum(isolated_audio), soundscape_audio, atol=1e-8, rtol=1e-8)
 
         jam = sc._instantiate(disable_instantiation_warnings=True)