Skip to content

Commit

Permalink
Trim events using SoundFile. (#128)
Browse files Browse the repository at this point in the history
* Trim on read via SoundFile.

* Updating regression data.

* Removing import of match_sample_length in core.py.

* Updating changelog.

Co-authored-by: pseeth <prem@descript.com>
  • Loading branch information
pseeth and pseeth authored Sep 23, 2020
1 parent 8ee6a0d commit 1cbfa8a
Show file tree
Hide file tree
Showing 21 changed files with 489 additions and 481 deletions.
6 changes: 6 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

Changelog
---------
v1.6.1
~~~~~~
- Trimming now happens on read, rather than after read. This prevents the entire file from being loaded into memory. This is helpful for long source audio files.
- Since the audio processing pipeline has changed, this version will generate marginally different audio data compared to previous versions: the change is not perceptible, but np.allclose() tests on audio from previous versions of Scaper may fail.
- This change updates the regression data for Scaper's regression tests

v1.6.0
~~~~~~
- Uses soxbindings when installing on Linux or MacOS, which results in better performance.
Expand Down
28 changes: 15 additions & 13 deletions scaper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from .util import polyphony_gini
from .util import is_real_number, is_real_array
from .audio import get_integrated_lufs
from .audio import match_sample_length
from .version import version as scaper_version

SUPPORTED_DIST = {"const": _sample_const,
Expand Down Expand Up @@ -1799,10 +1798,6 @@ def _generate_audio(self, audio_path, ann, reverb=None,
rate=self.sr,
channels=self.n_channels
)
# Then trim the duration of the background event
tfm.trim(e.value['source_time'],
e.value['source_time'] +
e.value['event_duration'])

# PROCESS BEFORE COMPUTING LUFS
tmpfiles_internal = []
Expand All @@ -1811,11 +1806,17 @@ def _generate_audio(self, audio_path, ann, reverb=None,
tmpfiles_internal.append(
tempfile.NamedTemporaryFile(
suffix='.wav', delete=False))
# read in background off disk
# read in background off disk, using start and stop
# to only read the necessary audio
event_sr = soundfile.info(e.value['source_file']).samplerate
start = int(e.value['source_time'] * event_sr)
stop = int((e.value['source_time'] + e.value['event_duration']) * event_sr)
event_audio, event_sr = soundfile.read(
e.value['source_file'], always_2d=True)
e.value['source_file'], always_2d=True,
start=start, stop=stop)
# tile the background along the appropriate dimensions
event_audio = np.tile(event_audio, (ntiles, 1))
event_audio = event_audio[:stop]
event_audio = tfm.build_array(
input_array=event_audio,
sample_rate_in=event_sr
Expand Down Expand Up @@ -1853,10 +1854,6 @@ def _generate_audio(self, audio_path, ann, reverb=None,
rate=self.sr,
channels=self.n_channels
)
# Trim
tfm.trim(e.value['source_time'],
e.value['source_time'] +
e.value['event_duration'])

# Pitch shift
if e.value['pitch_shift'] is not None:
Expand All @@ -1875,9 +1872,14 @@ def _generate_audio(self, audio_path, ann, reverb=None,
tempfile.NamedTemporaryFile(
suffix='.wav', delete=False))

# synthesize edited foreground sound event
# synthesize edited foreground sound event,
# doing the trim via soundfile
event_sr = soundfile.info(e.value['source_file']).samplerate
start = int(e.value['source_time'] * event_sr)
stop = int((e.value['source_time'] + e.value['event_duration']) * event_sr)
event_audio, event_sr = soundfile.read(
e.value['source_file'], always_2d=True)
e.value['source_file'], always_2d=True,
start=start, stop=stop)
event_audio = tfm.build_array(
input_array=event_audio,
sample_rate_in=event_sr
Expand Down
2 changes: 1 addition & 1 deletion scaper/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
"""Version info"""

short_version = '1.6'
version = '1.6.0'
version = '1.6.1'
98 changes: 49 additions & 49 deletions tests/data/regression/bgonly_soundscape_20200501_22050.jams
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
{
"file_metadata": {
"artist": "",
"jams_version": "0.3.3",
"identifiers": {},
"duration": 10.0,
"title": "",
"release": ""
},
"annotations": [
{
"annotation_metadata": {
"curator": {
"name": "",
"email": ""
},
"annotator": {},
"version": "",
"corpus": "",
"annotation_tools": "",
"annotation_rules": "",
"validation": "",
"data_source": ""
},
"namespace": "scaper",
"data": [
{
"confidence": 1.0,
"duration": 10.0,
"time": 0.0,
"duration": 10.0,
"value": {
"label": "park",
"source_file": "tests/data/audio/background/park/268903__yonts__city-park-tel-aviv-israel.wav",
Expand All @@ -24,45 +29,17 @@
"role": "background",
"pitch_shift": null,
"time_stretch": null
}
},
"confidence": 1.0
}
],
"time": 0,
"annotation_metadata": {
"corpus": "",
"validation": "",
"annotation_rules": "",
"curator": {
"name": "",
"email": ""
},
"annotation_tools": "",
"data_source": "",
"version": "",
"annotator": {}
},
"duration": 10.0,
"sandbox": {
"scaper": {
"allow_repeated_source": true,
"fade_out_len": 0.01,
"bg_path": "tests/data/audio/background",
"duration": 10.0,
"original_duration": 10.0,
"reverb": 0.2,
"polyphony_max": 0,
"polyphony_gini": 0,
"fg_labels": [
"car_horn",
"human_voice",
"siren"
],
"n_events": 0,
"scaper_version": "1.3.6",
"fade_in_len": 0.01,
"n_channels": 1,
"fg_path": "tests/data/audio/foreground",
"protected_labels": [],
"ref_db": -20,
"bg_path": "tests/data/audio/background",
"fg_spec": [],
"bg_spec": [
[
[
Expand Down Expand Up @@ -94,21 +71,44 @@
null
]
],
"soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav",
"fg_spec": [],
"sr": 22050,
"allow_repeated_label": true,
"fg_labels": [
"car_horn",
"human_voice",
"siren"
],
"bg_labels": [
"park",
"restaurant",
"street"
],
"duration": 10.0,
"protected_labels": [],
"sr": 22050,
"ref_db": -20,
"n_channels": 1,
"fade_in_len": 0.01,
"fade_out_len": 0.01,
"n_events": 0,
"polyphony_max": 0,
"polyphony_gini": 0,
"allow_repeated_label": true,
"allow_repeated_source": true,
"reverb": 0.2,
"scaper_version": "1.6.1",
"soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav",
"isolated_events_audio_path": []
}
},
"namespace": "scaper"
"time": 0,
"duration": 10.0
}
],
"file_metadata": {
"title": "",
"artist": "",
"release": "",
"duration": 10.0,
"identifiers": {},
"jams_version": "0.3.4"
},
"sandbox": {}
}
Binary file modified tests/data/regression/bgonly_soundscape_20200501_22050.wav
Binary file not shown.
98 changes: 49 additions & 49 deletions tests/data/regression/bgonly_soundscape_20200501_44100.jams
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
{
"file_metadata": {
"artist": "",
"jams_version": "0.3.3",
"identifiers": {},
"duration": 10.0,
"title": "",
"release": ""
},
"annotations": [
{
"annotation_metadata": {
"curator": {
"name": "",
"email": ""
},
"annotator": {},
"version": "",
"corpus": "",
"annotation_tools": "",
"annotation_rules": "",
"validation": "",
"data_source": ""
},
"namespace": "scaper",
"data": [
{
"confidence": 1.0,
"duration": 10.0,
"time": 0.0,
"duration": 10.0,
"value": {
"label": "park",
"source_file": "tests/data/audio/background/park/268903__yonts__city-park-tel-aviv-israel.wav",
Expand All @@ -24,45 +29,17 @@
"role": "background",
"pitch_shift": null,
"time_stretch": null
}
},
"confidence": 1.0
}
],
"time": 0,
"annotation_metadata": {
"corpus": "",
"validation": "",
"annotation_rules": "",
"curator": {
"name": "",
"email": ""
},
"annotation_tools": "",
"data_source": "",
"version": "",
"annotator": {}
},
"duration": 10.0,
"sandbox": {
"scaper": {
"allow_repeated_source": true,
"fade_out_len": 0.01,
"bg_path": "tests/data/audio/background",
"duration": 10.0,
"original_duration": 10.0,
"reverb": 0.2,
"polyphony_max": 0,
"polyphony_gini": 0,
"fg_labels": [
"car_horn",
"human_voice",
"siren"
],
"n_events": 0,
"scaper_version": "1.3.6",
"fade_in_len": 0.01,
"n_channels": 1,
"fg_path": "tests/data/audio/foreground",
"protected_labels": [],
"ref_db": -20,
"bg_path": "tests/data/audio/background",
"fg_spec": [],
"bg_spec": [
[
[
Expand Down Expand Up @@ -94,21 +71,44 @@
null
]
],
"soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav",
"fg_spec": [],
"sr": 44100,
"allow_repeated_label": true,
"fg_labels": [
"car_horn",
"human_voice",
"siren"
],
"bg_labels": [
"park",
"restaurant",
"street"
],
"duration": 10.0,
"protected_labels": [],
"sr": 44100,
"ref_db": -20,
"n_channels": 1,
"fade_in_len": 0.01,
"fade_out_len": 0.01,
"n_events": 0,
"polyphony_max": 0,
"polyphony_gini": 0,
"allow_repeated_label": true,
"allow_repeated_source": true,
"reverb": 0.2,
"scaper_version": "1.6.1",
"soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav",
"isolated_events_audio_path": []
}
},
"namespace": "scaper"
"time": 0,
"duration": 10.0
}
],
"file_metadata": {
"title": "",
"artist": "",
"release": "",
"duration": 10.0,
"identifiers": {},
"jams_version": "0.3.4"
},
"sandbox": {}
}
Binary file modified tests/data/regression/bgonly_soundscape_20200501_44100.wav
Binary file not shown.
Loading

0 comments on commit 1cbfa8a

Please sign in to comment.