From 8cbf94b985f2fb6efa5b5709e940d8ad436fff19 Mon Sep 17 00:00:00 2001 From: pseeth Date: Mon, 28 Sep 2020 15:02:17 -0700 Subject: [PATCH] Adds an option for using quick time stretching and pitch shifting. (#137) * Adding quick_pitch_time option to generate, and saving to JAMS. Updated profile script to include command line options. Updated profile results. Bumped version. Failing tests due to mismatch in keys with regression data. * Updating regression data to include quick_pitch_time=False. * Bumping version in profile results. Co-authored-by: pseeth --- docs/changes.rst | 5 ++ scaper/core.py | 25 ++++++- scaper/version.py | 2 +- .../bgonly_soundscape_20200501_22050.jams | 5 +- .../bgonly_soundscape_20200501_44100.jams | 5 +- .../reverb_soundscape_20200501_22050.jams | 5 +- .../reverb_soundscape_20200501_44100.jams | 5 +- .../regression/soundscape_20200501_22050.jams | 5 +- .../regression/soundscape_20200501_44100.jams | 5 +- tests/profile_results.csv | 21 +++--- tests/profile_speed.py | 11 +++ tests/test_core.py | 74 ++++++++++++++++++- 12 files changed, 140 insertions(+), 28 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 0dd360e..d63c7df 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -2,6 +2,11 @@ Changelog --------- +v1.6.4 +~~~~~~ +- Scaper.generate now accepts a new argument for controlling trade-off between speed and quality in pitch shifting and time stretching: + - quick_pitch_time: if True, both time stretching and pitch shifting will be applied in quick mode, which is much faster but has lower quality. + v1.6.3 ~~~~~~ - Scaper.generate now accepts two new optional arguments for controlling audio clipping and normalization: diff --git a/scaper/core.py b/scaper/core.py index 9b2721c..fe9c3b7 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -223,6 +223,11 @@ def generate_from_jams(jams_infile, else: peak_normalization = False + if 'quick_pitch_time' in ann.sandbox.scaper.keys(): + quick_pitch_time = ann.sandbox.scaper['quick_pitch_time'] + else: + quick_pitch_time = False + # Cast ann.sandbox.scaper to a Sandbox object ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper) @@ -233,6 +238,7 @@ def generate_from_jams(jams_infile, reverb=reverb, fix_clipping=fix_clipping, peak_normalization=peak_normalization, + quick_pitch_time=quick_pitch_time, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, disable_sox_warnings=disable_sox_warnings) @@ -242,6 +248,7 @@ def generate_from_jams(jams_infile, ann.sandbox.scaper.reverb = reverb ann.sandbox.scaper.fix_clipping = fix_clipping ann.sandbox.scaper.peak_normalization = peak_normalization + ann.sandbox.scaper.quick_pitch_time = quick_pitch_time ann.sandbox.scaper.save_isolated_events = save_isolated_events ann.sandbox.scaper.isolated_events_path = isolated_events_path ann.sandbox.scaper.disable_sox_warnings = disable_sox_warnings @@ -1746,6 +1753,7 @@ def _generate_audio(self, reverb=None, fix_clipping=False, peak_normalization=False, + quick_pitch_time=False, save_isolated_events=False, isolated_events_path=None, disable_sox_warnings=True): @@ -1776,6 +1784,10 @@ def _generate_audio(self, each isolated event is also scaled accordingly. Note: this will change the actual value of `ref_db` in the generated audio. The scaling factor that was used is returned. + quick_pitch_time : bool + When True (default=False), time stretching and pitch shifting will be + applied with `quick=True`. This is much faster but the resultant + audio is generally of lower audio quality. save_isolated_events : bool If True, this will save the isolated foreground events and backgrounds in a directory adjacent to the generated soundscape @@ -1922,12 +1934,12 @@ def _generate_audio(self, # Pitch shift if e.value['pitch_shift'] is not None: - tfm.pitch(e.value['pitch_shift']) + tfm.pitch(e.value['pitch_shift'], quick=quick_pitch_time) # Time stretch if e.value['time_stretch'] is not None: factor = 1.0 / float(e.value['time_stretch']) - tfm.tempo(factor, audio_type='s', quick=False) + tfm.tempo(factor, audio_type='s', quick=quick_pitch_time) # PROCESS BEFORE COMPUTING LUFS tmpfiles_internal = [] @@ -2102,6 +2114,7 @@ def generate(self, reverb=None, fix_clipping=False, peak_normalization=False, + quick_pitch_time=False, save_isolated_events=False, isolated_events_path=None, disable_sox_warnings=True, @@ -2154,6 +2167,10 @@ def generate(self, `ref_db` value will be stored in the JAMS annotation. The SNR of foreground events with respect to the background is unaffected except when extreme scaling is required to achieve peak normalization. + quick_pitch_time : bool + When True (default=False), time stretching and pitch shifting will be + applied with `quick=True`. This is much faster but the resultant + audio is generally of lower audio quality. save_isolated_events : bool If True, this will save the isolated foreground events and backgrounds in a directory adjacent to the generated soundscape @@ -2253,7 +2270,8 @@ def generate(self, isolated_events_path=isolated_events_path, disable_sox_warnings=disable_sox_warnings, fix_clipping=fix_clipping, - peak_normalization=peak_normalization) + peak_normalization=peak_normalization, + quick_pitch_time=quick_pitch_time) # TODO: Stick to heavy handed overwriting for now, in the future we # should consolidate this with what happens inside _instantiate(). @@ -2264,6 +2282,7 @@ def generate(self, ann.sandbox.scaper.reverb = reverb ann.sandbox.scaper.fix_clipping = fix_clipping ann.sandbox.scaper.peak_normalization = peak_normalization + ann.sandbox.scaper.quick_pitch_time = quick_pitch_time ann.sandbox.scaper.save_isolated_events = save_isolated_events ann.sandbox.scaper.isolated_events_path = isolated_events_path ann.sandbox.scaper.disable_sox_warnings = disable_sox_warnings diff --git a/scaper/version.py b/scaper/version.py index c65a965..98e03ab 100644 --- a/scaper/version.py +++ b/scaper/version.py @@ -3,4 +3,4 @@ """Version info""" short_version = '1.6' -version = '1.6.3' +version = '1.6.4' diff --git a/tests/data/regression/bgonly_soundscape_20200501_22050.jams b/tests/data/regression/bgonly_soundscape_20200501_22050.jams index 4dd6f1c..df080c9 100644 --- a/tests/data/regression/bgonly_soundscape_20200501_22050.jams +++ b/tests/data/regression/bgonly_soundscape_20200501_22050.jams @@ -93,7 +93,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav", @@ -109,7 +109,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -20 + "ref_db_generated": -20, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/bgonly_soundscape_20200501_44100.jams b/tests/data/regression/bgonly_soundscape_20200501_44100.jams index d1cf2cc..a1d2a1f 100644 --- a/tests/data/regression/bgonly_soundscape_20200501_44100.jams +++ b/tests/data/regression/bgonly_soundscape_20200501_44100.jams @@ -93,7 +93,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav", @@ -109,7 +109,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -20 + "ref_db_generated": -20, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/reverb_soundscape_20200501_22050.jams b/tests/data/regression/reverb_soundscape_20200501_22050.jams index 38dbb8e..c74462e 100644 --- a/tests/data/regression/reverb_soundscape_20200501_22050.jams +++ b/tests/data/regression/reverb_soundscape_20200501_22050.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/reverb_soundscape_20200501_22050.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/reverb_soundscape_20200501_22050.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/reverb_soundscape_20200501_44100.jams b/tests/data/regression/reverb_soundscape_20200501_44100.jams index cad1151..66bdb33 100644 --- a/tests/data/regression/reverb_soundscape_20200501_44100.jams +++ b/tests/data/regression/reverb_soundscape_20200501_44100.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/reverb_soundscape_20200501_44100.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/reverb_soundscape_20200501_44100.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/soundscape_20200501_22050.jams b/tests/data/regression/soundscape_20200501_22050.jams index 508cab9..5d132d4 100644 --- a/tests/data/regression/soundscape_20200501_22050.jams +++ b/tests/data/regression/soundscape_20200501_22050.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": null, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/soundscape_20200501_22050.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/soundscape_20200501_22050.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/soundscape_20200501_44100.jams b/tests/data/regression/soundscape_20200501_44100.jams index 06b1496..927e9b8 100644 --- a/tests/data/regression/soundscape_20200501_44100.jams +++ b/tests/data/regression/soundscape_20200501_44100.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": null, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/soundscape_20200501_44100.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/soundscape_20200501_44100.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/profile_results.csv b/tests/profile_results.csv index 5e596ab..3d9c492 100644 --- a/tests/profile_results.csv +++ b/tests/profile_results.csv @@ -1,10 +1,11 @@ -time_of_run,scaper_version,python_version,system,machine,processor,n_cpu,n_workers,memory,n_soundscapes,execution_time,git_commit_hash -2020-07-17 14:13:46.982171,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,149.7468,e0c08d4f6eb10bc0b337a9d47f86b3b110ed0836 -2020-07-17 14:59:33.707885,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,135.1724,c780d270b0ea0c691e1cc1dbf725d1c4b35e5299 -2020-07-20 14:39:41.950552,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,118.7033,8c0cfe3c14e06bf46bcd6480f6be5991b0fba077 -2020-07-21 11:04:32.178729,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,114.4563,e903933594cb86b187c6c79066f969a9653d1897 -2020-09-18 16:57:39.359920,1.4.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,121.6504,d9346f71a7992e7a2578186a7ad9238d93aa3e66 -2020-09-22 18:59:10.570372,1.6.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,98.4973,edfe1de5c6e46206f64d6b8218b490d074871d24 -2020-09-23 12:50:07.721451,1.6.1,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,97.5089,8ee6a0ddfadde9b1b7cc1fd96a3a8e513c8256e5 -2020-09-23 15:06:59.663871,1.6.2,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.1766,e0107122b3cadaa713e119ce68b843876024ee63 -2020-09-28 12:39:24.620412,1.6.3,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.6309,d779ef3d328e17d55239a893211774f9052d2168 +command,time_of_run,scaper_version,python_version,system,machine,processor,n_cpu,n_workers,memory,n_soundscapes,execution_time,git_commit_hash +python tests/profile_speed.py,2020-07-17 14:13:46.982171,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,149.7468,e0c08d4f6eb10bc0b337a9d47f86b3b110ed0836 +python tests/profile_speed.py,2020-07-17 14:59:33.707885,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,135.1724,c780d270b0ea0c691e1cc1dbf725d1c4b35e5299 +python tests/profile_speed.py,2020-07-20 14:39:41.950552,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,118.7033,8c0cfe3c14e06bf46bcd6480f6be5991b0fba077 +python tests/profile_speed.py,2020-07-21 11:04:32.178729,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,114.4563,e903933594cb86b187c6c79066f969a9653d1897 +python tests/profile_speed.py,2020-09-18 16:57:39.359920,1.4.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,121.6504,d9346f71a7992e7a2578186a7ad9238d93aa3e66 +python tests/profile_speed.py,2020-09-22 18:59:10.570372,1.6.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,98.4973,edfe1de5c6e46206f64d6b8218b490d074871d24 +python tests/profile_speed.py,2020-09-23 12:50:07.721451,1.6.1,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,97.5089,8ee6a0ddfadde9b1b7cc1fd96a3a8e513c8256e5 +python tests/profile_speed.py,2020-09-23 15:06:59.663871,1.6.2,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.1766,e0107122b3cadaa713e119ce68b843876024ee63 +python tests/profile_speed.py,2020-09-28 12:39:24.620412,1.6.3,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.6309,d779ef3d328e17d55239a893211774f9052d2168 +python tests/profile_speed.py --quick,2020-09-28 13:58:39.939934,1.6.4,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,17.6575,c688a0f28bb19cfd49ec339fd43c1d168262e07f diff --git a/tests/profile_speed.py b/tests/profile_speed.py index 9f74e8a..3a1e26e 100644 --- a/tests/profile_speed.py +++ b/tests/profile_speed.py @@ -19,9 +19,18 @@ import datetime import math import multiprocessing +import argparse +import sys + +parser = argparse.ArgumentParser() +parser.add_argument('--quick', action='store_true') +args = parser.parse_args() +cmd_line = ' '.join(sys.argv) +cmd_line = 'python ' + cmd_line # Download the audio automatically FIX_DIR = 'tests/data/' +QUICK_PITCH_TIME = args.quick def get_git_commit_hash(): process = subprocess.Popen( @@ -130,6 +139,7 @@ def convert_size(size_bytes): allow_repeated_source=True, reverb=0.1, disable_sox_warnings=True, + quick_pitch_time=QUICK_PITCH_TIME, no_audio=False, txt_path=txtfile) @@ -137,6 +147,7 @@ def convert_size(size_bytes): uname = platform.uname() row = { + 'command': cmd_line, 'time_of_run': str(datetime.datetime.now()), 'scaper_version': scaper.__version__, 'python_version': platform.python_version(), diff --git a/tests/test_core.py b/tests/test_core.py index 0933b67..1222deb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -232,7 +232,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): assert np.allclose(orig_wav, fj_soundscape_audio) regjam = jams.load(TEST_PATHS[44100]['REG'].jams) - sandbox_exclude = ['fix_clipping', 'peak_normalization'] + sandbox_exclude = ['fix_clipping', 'peak_normalization', 'quick_pitch_time'] _compare_scaper_jams( regjam, fj_soundscape_jam, exclude_additional_scaper_sandbox_keys=sandbox_exclude) @@ -247,7 +247,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) # validate jams - sandbox_exclude = ['fix_clipping', 'peak_normalization'] + sandbox_exclude = ['fix_clipping', 'peak_normalization', 'quick_pitch_time'] gen_jam = jams.load(gen_jam_file.name) _compare_scaper_jams( regjam, gen_jam, @@ -390,6 +390,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, assert ann.sandbox.scaper.jams_path == orig_jam_file.name assert ann.sandbox.scaper.fix_clipping is False assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is False assert ann.sandbox.scaper.save_isolated_events is False assert ann.sandbox.scaper.isolated_events_path is None assert ann.sandbox.scaper.disable_sox_warnings is True @@ -434,6 +435,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, assert ann.sandbox.scaper.jams_path == orig_jam_file.name assert ann.sandbox.scaper.fix_clipping is True assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is False assert ann.sandbox.scaper.save_isolated_events is False assert ann.sandbox.scaper.isolated_events_path is None assert ann.sandbox.scaper.disable_sox_warnings is True @@ -463,6 +465,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, # assert ann.sandbox.scaper.jams_path == gen_jam_file.name assert ann.sandbox.scaper.fix_clipping is True assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is False assert ann.sandbox.scaper.save_isolated_events is False assert ann.sandbox.scaper.isolated_events_path is None assert ann.sandbox.scaper.disable_sox_warnings is True @@ -474,6 +477,72 @@ def _validate_soundscape_and_event_audio(orig_wav_file, assert ann.sandbox.scaper.ref_db_change != 0 assert ann.sandbox.scaper.ref_db_generated != \ ann.sandbox.scaper.ref_db + + # Test when we generate ONLY a JAMS file, and then generate audio from the JAMS + # Case 3: WITH quick_pitch_time=True, no clipping + for _ in range(5): + (soundscape_audio, soundscape_jam, annotation_list, event_audio_list) = \ + sc.generate(audio_path=orig_wav_file.name, + jams_path=orig_jam_file.name, + txt_path=orig_txt_file.name, + no_audio=True, + fix_clipping=True, + quick_pitch_time=True, + disable_instantiation_warnings=True) + + assert soundscape_audio is None + assert event_audio_list is None + assert soundscape_jam is not None + assert annotation_list is not None + + ann = soundscape_jam.annotations.search(namespace='scaper')[0] + + assert ann.sandbox.scaper.audio_path == orig_wav_file.name + assert ann.sandbox.scaper.jams_path == orig_jam_file.name + assert ann.sandbox.scaper.fix_clipping is True + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is True + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True + assert ann.sandbox.scaper.txt_path == orig_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor == 1.0 + assert ann.sandbox.scaper.ref_db_change == 0 + assert ann.sandbox.scaper.ref_db_generated == \ + ann.sandbox.scaper.ref_db + + (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ + scaper.generate_from_jams(orig_jam_file.name, + audio_outfile=gen_wav_file.name, + jams_outfile=gen_jam_file.name, + txt_path=gen_txt_file.name) + + assert fj_soundscape_audio is not None + assert fj_event_audio_list is not None + assert fj_soundscape_jam is not None + assert fj_annotation_list is not None + + ann = fj_soundscape_jam.annotations.search(namespace='scaper')[0] + + # assert ann.sandbox.scaper.audio_path == gen_wav_file.name + # assert ann.sandbox.scaper.jams_path == gen_jam_file.name + assert ann.sandbox.scaper.fix_clipping is True + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is True + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True # TODO + # assert ann.sandbox.scaper.txt_path == gen_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor == 1.0 + assert ann.sandbox.scaper.ref_db_change == 0 + assert ann.sandbox.scaper.ref_db_generated == \ + ann.sandbox.scaper.ref_db # validate return API # _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) @@ -1489,6 +1558,7 @@ def test_scaper_instantiate(): 'fix_clipping', 'peak_normalization', 'peak_normalization_scale_factor', + 'quick_pitch_time', 'ref_db_change', 'ref_db_generated', 'txt_sep',