diff --git a/docs/changes.rst b/docs/changes.rst index 0dd360e..d63c7df 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -2,6 +2,11 @@ Changelog --------- +v1.6.4 +~~~~~~ +- Scaper.generate now accepts a new argument for controlling trade-off between speed and quality in pitch shifting and time stretching: + - quick_pitch_time: if True, both time stretching and pitch shifting will be applied in quick mode, which is much faster but has lower quality. + v1.6.3 ~~~~~~ - Scaper.generate now accepts two new optional arguments for controlling audio clipping and normalization: diff --git a/scaper/core.py b/scaper/core.py index 9b2721c..fe9c3b7 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -223,6 +223,11 @@ def generate_from_jams(jams_infile, else: peak_normalization = False + if 'quick_pitch_time' in ann.sandbox.scaper.keys(): + quick_pitch_time = ann.sandbox.scaper['quick_pitch_time'] + else: + quick_pitch_time = False + # Cast ann.sandbox.scaper to a Sandbox object ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper) @@ -233,6 +238,7 @@ def generate_from_jams(jams_infile, reverb=reverb, fix_clipping=fix_clipping, peak_normalization=peak_normalization, + quick_pitch_time=quick_pitch_time, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, disable_sox_warnings=disable_sox_warnings) @@ -242,6 +248,7 @@ def generate_from_jams(jams_infile, ann.sandbox.scaper.reverb = reverb ann.sandbox.scaper.fix_clipping = fix_clipping ann.sandbox.scaper.peak_normalization = peak_normalization + ann.sandbox.scaper.quick_pitch_time = quick_pitch_time ann.sandbox.scaper.save_isolated_events = save_isolated_events ann.sandbox.scaper.isolated_events_path = isolated_events_path ann.sandbox.scaper.disable_sox_warnings = disable_sox_warnings @@ -1746,6 +1753,7 @@ def _generate_audio(self, reverb=None, fix_clipping=False, peak_normalization=False, + quick_pitch_time=False, save_isolated_events=False, isolated_events_path=None, disable_sox_warnings=True): @@ -1776,6 +1784,10 @@ def _generate_audio(self, each isolated event is also scaled accordingly. Note: this will change the actual value of `ref_db` in the generated audio. The scaling factor that was used is returned. + quick_pitch_time : bool + When True (default=False), time stretching and pitch shifting will be + applied with `quick=True`. This is much faster but the resultant + audio is generally of lower audio quality. save_isolated_events : bool If True, this will save the isolated foreground events and backgrounds in a directory adjacent to the generated soundscape @@ -1922,12 +1934,12 @@ def _generate_audio(self, # Pitch shift if e.value['pitch_shift'] is not None: - tfm.pitch(e.value['pitch_shift']) + tfm.pitch(e.value['pitch_shift'], quick=quick_pitch_time) # Time stretch if e.value['time_stretch'] is not None: factor = 1.0 / float(e.value['time_stretch']) - tfm.tempo(factor, audio_type='s', quick=False) + tfm.tempo(factor, audio_type='s', quick=quick_pitch_time) # PROCESS BEFORE COMPUTING LUFS tmpfiles_internal = [] @@ -2102,6 +2114,7 @@ def generate(self, reverb=None, fix_clipping=False, peak_normalization=False, + quick_pitch_time=False, save_isolated_events=False, isolated_events_path=None, disable_sox_warnings=True, @@ -2154,6 +2167,10 @@ def generate(self, `ref_db` value will be stored in the JAMS annotation. The SNR of foreground events with respect to the background is unaffected except when extreme scaling is required to achieve peak normalization. + quick_pitch_time : bool + When True (default=False), time stretching and pitch shifting will be + applied with `quick=True`. This is much faster but the resultant + audio is generally of lower audio quality. save_isolated_events : bool If True, this will save the isolated foreground events and backgrounds in a directory adjacent to the generated soundscape @@ -2253,7 +2270,8 @@ def generate(self, isolated_events_path=isolated_events_path, disable_sox_warnings=disable_sox_warnings, fix_clipping=fix_clipping, - peak_normalization=peak_normalization) + peak_normalization=peak_normalization, + quick_pitch_time=quick_pitch_time) # TODO: Stick to heavy handed overwriting for now, in the future we # should consolidate this with what happens inside _instantiate(). @@ -2264,6 +2282,7 @@ def generate(self, ann.sandbox.scaper.reverb = reverb ann.sandbox.scaper.fix_clipping = fix_clipping ann.sandbox.scaper.peak_normalization = peak_normalization + ann.sandbox.scaper.quick_pitch_time = quick_pitch_time ann.sandbox.scaper.save_isolated_events = save_isolated_events ann.sandbox.scaper.isolated_events_path = isolated_events_path ann.sandbox.scaper.disable_sox_warnings = disable_sox_warnings diff --git a/scaper/version.py b/scaper/version.py index c65a965..98e03ab 100644 --- a/scaper/version.py +++ b/scaper/version.py @@ -3,4 +3,4 @@ """Version info""" short_version = '1.6' -version = '1.6.3' +version = '1.6.4' diff --git a/tests/data/regression/bgonly_soundscape_20200501_22050.jams b/tests/data/regression/bgonly_soundscape_20200501_22050.jams index 4dd6f1c..df080c9 100644 --- a/tests/data/regression/bgonly_soundscape_20200501_22050.jams +++ b/tests/data/regression/bgonly_soundscape_20200501_22050.jams @@ -93,7 +93,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav", @@ -109,7 +109,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -20 + "ref_db_generated": -20, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/bgonly_soundscape_20200501_44100.jams b/tests/data/regression/bgonly_soundscape_20200501_44100.jams index d1cf2cc..a1d2a1f 100644 --- a/tests/data/regression/bgonly_soundscape_20200501_44100.jams +++ b/tests/data/regression/bgonly_soundscape_20200501_44100.jams @@ -93,7 +93,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav", @@ -109,7 +109,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -20 + "ref_db_generated": -20, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/reverb_soundscape_20200501_22050.jams b/tests/data/regression/reverb_soundscape_20200501_22050.jams index 38dbb8e..c74462e 100644 --- a/tests/data/regression/reverb_soundscape_20200501_22050.jams +++ b/tests/data/regression/reverb_soundscape_20200501_22050.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/reverb_soundscape_20200501_22050.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/reverb_soundscape_20200501_22050.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/reverb_soundscape_20200501_44100.jams b/tests/data/regression/reverb_soundscape_20200501_44100.jams index cad1151..66bdb33 100644 --- a/tests/data/regression/reverb_soundscape_20200501_44100.jams +++ b/tests/data/regression/reverb_soundscape_20200501_44100.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/reverb_soundscape_20200501_44100.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/reverb_soundscape_20200501_44100.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/soundscape_20200501_22050.jams b/tests/data/regression/soundscape_20200501_22050.jams index 508cab9..5d132d4 100644 --- a/tests/data/regression/soundscape_20200501_22050.jams +++ b/tests/data/regression/soundscape_20200501_22050.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": null, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/soundscape_20200501_22050.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/soundscape_20200501_22050.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/data/regression/soundscape_20200501_44100.jams b/tests/data/regression/soundscape_20200501_44100.jams index 06b1496..927e9b8 100644 --- a/tests/data/regression/soundscape_20200501_44100.jams +++ b/tests/data/regression/soundscape_20200501_44100.jams @@ -235,7 +235,7 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": null, - "scaper_version": "1.6.3", + "scaper_version": "1.6.4", "soundscape_audio_path": "tests/data/regression/soundscape_20200501_44100.wav", "isolated_events_audio_path": [], "audio_path": "tests/data/regression/soundscape_20200501_44100.wav", @@ -251,7 +251,8 @@ "disable_instantiation_warnings": true, "peak_normalization_scale_factor": 1.0, "ref_db_change": 0, - "ref_db_generated": -50 + "ref_db_generated": -50, + "quick_pitch_time": false } }, "time": 0, diff --git a/tests/profile_results.csv b/tests/profile_results.csv index 5e596ab..3d9c492 100644 --- a/tests/profile_results.csv +++ b/tests/profile_results.csv @@ -1,10 +1,11 @@ -time_of_run,scaper_version,python_version,system,machine,processor,n_cpu,n_workers,memory,n_soundscapes,execution_time,git_commit_hash -2020-07-17 14:13:46.982171,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,149.7468,e0c08d4f6eb10bc0b337a9d47f86b3b110ed0836 -2020-07-17 14:59:33.707885,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,135.1724,c780d270b0ea0c691e1cc1dbf725d1c4b35e5299 -2020-07-20 14:39:41.950552,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,118.7033,8c0cfe3c14e06bf46bcd6480f6be5991b0fba077 -2020-07-21 11:04:32.178729,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,114.4563,e903933594cb86b187c6c79066f969a9653d1897 -2020-09-18 16:57:39.359920,1.4.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,121.6504,d9346f71a7992e7a2578186a7ad9238d93aa3e66 -2020-09-22 18:59:10.570372,1.6.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,98.4973,edfe1de5c6e46206f64d6b8218b490d074871d24 -2020-09-23 12:50:07.721451,1.6.1,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,97.5089,8ee6a0ddfadde9b1b7cc1fd96a3a8e513c8256e5 -2020-09-23 15:06:59.663871,1.6.2,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.1766,e0107122b3cadaa713e119ce68b843876024ee63 -2020-09-28 12:39:24.620412,1.6.3,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.6309,d779ef3d328e17d55239a893211774f9052d2168 +command,time_of_run,scaper_version,python_version,system,machine,processor,n_cpu,n_workers,memory,n_soundscapes,execution_time,git_commit_hash +python tests/profile_speed.py,2020-07-17 14:13:46.982171,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,149.7468,e0c08d4f6eb10bc0b337a9d47f86b3b110ed0836 +python tests/profile_speed.py,2020-07-17 14:59:33.707885,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,135.1724,c780d270b0ea0c691e1cc1dbf725d1c4b35e5299 +python tests/profile_speed.py,2020-07-20 14:39:41.950552,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,118.7033,8c0cfe3c14e06bf46bcd6480f6be5991b0fba077 +python tests/profile_speed.py,2020-07-21 11:04:32.178729,1.3.8,3.7.7,Darwin,x86_64,i386,8,1,16.0 GB,100,114.4563,e903933594cb86b187c6c79066f969a9653d1897 +python tests/profile_speed.py,2020-09-18 16:57:39.359920,1.4.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,121.6504,d9346f71a7992e7a2578186a7ad9238d93aa3e66 +python tests/profile_speed.py,2020-09-22 18:59:10.570372,1.6.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,98.4973,edfe1de5c6e46206f64d6b8218b490d074871d24 +python tests/profile_speed.py,2020-09-23 12:50:07.721451,1.6.1,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,97.5089,8ee6a0ddfadde9b1b7cc1fd96a3a8e513c8256e5 +python tests/profile_speed.py,2020-09-23 15:06:59.663871,1.6.2,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.1766,e0107122b3cadaa713e119ce68b843876024ee63 +python tests/profile_speed.py,2020-09-28 12:39:24.620412,1.6.3,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.6309,d779ef3d328e17d55239a893211774f9052d2168 +python tests/profile_speed.py --quick,2020-09-28 13:58:39.939934,1.6.4,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,17.6575,c688a0f28bb19cfd49ec339fd43c1d168262e07f diff --git a/tests/profile_speed.py b/tests/profile_speed.py index 9f74e8a..3a1e26e 100644 --- a/tests/profile_speed.py +++ b/tests/profile_speed.py @@ -19,9 +19,18 @@ import datetime import math import multiprocessing +import argparse +import sys + +parser = argparse.ArgumentParser() +parser.add_argument('--quick', action='store_true') +args = parser.parse_args() +cmd_line = ' '.join(sys.argv) +cmd_line = 'python ' + cmd_line # Download the audio automatically FIX_DIR = 'tests/data/' +QUICK_PITCH_TIME = args.quick def get_git_commit_hash(): process = subprocess.Popen( @@ -130,6 +139,7 @@ def convert_size(size_bytes): allow_repeated_source=True, reverb=0.1, disable_sox_warnings=True, + quick_pitch_time=QUICK_PITCH_TIME, no_audio=False, txt_path=txtfile) @@ -137,6 +147,7 @@ def convert_size(size_bytes): uname = platform.uname() row = { + 'command': cmd_line, 'time_of_run': str(datetime.datetime.now()), 'scaper_version': scaper.__version__, 'python_version': platform.python_version(), diff --git a/tests/test_core.py b/tests/test_core.py index 0933b67..1222deb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -232,7 +232,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): assert np.allclose(orig_wav, fj_soundscape_audio) regjam = jams.load(TEST_PATHS[44100]['REG'].jams) - sandbox_exclude = ['fix_clipping', 'peak_normalization'] + sandbox_exclude = ['fix_clipping', 'peak_normalization', 'quick_pitch_time'] _compare_scaper_jams( regjam, fj_soundscape_jam, exclude_additional_scaper_sandbox_keys=sandbox_exclude) @@ -247,7 +247,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) # validate jams - sandbox_exclude = ['fix_clipping', 'peak_normalization'] + sandbox_exclude = ['fix_clipping', 'peak_normalization', 'quick_pitch_time'] gen_jam = jams.load(gen_jam_file.name) _compare_scaper_jams( regjam, gen_jam, @@ -390,6 +390,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, assert ann.sandbox.scaper.jams_path == orig_jam_file.name assert ann.sandbox.scaper.fix_clipping is False assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is False assert ann.sandbox.scaper.save_isolated_events is False assert ann.sandbox.scaper.isolated_events_path is None assert ann.sandbox.scaper.disable_sox_warnings is True @@ -434,6 +435,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, assert ann.sandbox.scaper.jams_path == orig_jam_file.name assert ann.sandbox.scaper.fix_clipping is True assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is False assert ann.sandbox.scaper.save_isolated_events is False assert ann.sandbox.scaper.isolated_events_path is None assert ann.sandbox.scaper.disable_sox_warnings is True @@ -463,6 +465,7 @@ def _validate_soundscape_and_event_audio(orig_wav_file, # assert ann.sandbox.scaper.jams_path == gen_jam_file.name assert ann.sandbox.scaper.fix_clipping is True assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is False assert ann.sandbox.scaper.save_isolated_events is False assert ann.sandbox.scaper.isolated_events_path is None assert ann.sandbox.scaper.disable_sox_warnings is True @@ -474,6 +477,72 @@ def _validate_soundscape_and_event_audio(orig_wav_file, assert ann.sandbox.scaper.ref_db_change != 0 assert ann.sandbox.scaper.ref_db_generated != \ ann.sandbox.scaper.ref_db + + # Test when we generate ONLY a JAMS file, and then generate audio from the JAMS + # Case 3: WITH quick_pitch_time=True, no clipping + for _ in range(5): + (soundscape_audio, soundscape_jam, annotation_list, event_audio_list) = \ + sc.generate(audio_path=orig_wav_file.name, + jams_path=orig_jam_file.name, + txt_path=orig_txt_file.name, + no_audio=True, + fix_clipping=True, + quick_pitch_time=True, + disable_instantiation_warnings=True) + + assert soundscape_audio is None + assert event_audio_list is None + assert soundscape_jam is not None + assert annotation_list is not None + + ann = soundscape_jam.annotations.search(namespace='scaper')[0] + + assert ann.sandbox.scaper.audio_path == orig_wav_file.name + assert ann.sandbox.scaper.jams_path == orig_jam_file.name + assert ann.sandbox.scaper.fix_clipping is True + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is True + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True + assert ann.sandbox.scaper.txt_path == orig_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor == 1.0 + assert ann.sandbox.scaper.ref_db_change == 0 + assert ann.sandbox.scaper.ref_db_generated == \ + ann.sandbox.scaper.ref_db + + (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ + scaper.generate_from_jams(orig_jam_file.name, + audio_outfile=gen_wav_file.name, + jams_outfile=gen_jam_file.name, + txt_path=gen_txt_file.name) + + assert fj_soundscape_audio is not None + assert fj_event_audio_list is not None + assert fj_soundscape_jam is not None + assert fj_annotation_list is not None + + ann = fj_soundscape_jam.annotations.search(namespace='scaper')[0] + + # assert ann.sandbox.scaper.audio_path == gen_wav_file.name + # assert ann.sandbox.scaper.jams_path == gen_jam_file.name + assert ann.sandbox.scaper.fix_clipping is True + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.quick_pitch_time is True + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True # TODO + # assert ann.sandbox.scaper.txt_path == gen_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor == 1.0 + assert ann.sandbox.scaper.ref_db_change == 0 + assert ann.sandbox.scaper.ref_db_generated == \ + ann.sandbox.scaper.ref_db # validate return API # _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) @@ -1489,6 +1558,7 @@ def test_scaper_instantiate(): 'fix_clipping', 'peak_normalization', 'peak_normalization_scale_factor', + 'quick_pitch_time', 'ref_db_change', 'ref_db_generated', 'txt_sep',