From 182894efa1561097c377a7f053786e93ed3471ea Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Tue, 9 May 2017 17:28:26 -0400 Subject: [PATCH 01/31] working on alternate AnnotationData backend --- jams/core.py | 154 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 23 deletions(-) diff --git a/jams/core.py b/jams/core.py index 85bfe6fd..501f7c47 100644 --- a/jams/core.py +++ b/jams/core.py @@ -26,7 +26,8 @@ AnnotationMetadata Curator Annotation - JamsFrame + AnnotationData + Observation Sandbox JObject @@ -34,6 +35,8 @@ import json import jsonschema +from collections import namedtuple +from sortedcontainers import SortedListWithKey import numpy as np import pandas as pd @@ -53,7 +56,8 @@ __all__ = ['load', 'JObject', 'Sandbox', 'JamsFrame', 'Annotation', 'Curator', 'AnnotationMetadata', - 'FileMetadata', 'AnnotationArray', 'JAMS'] + 'FileMetadata', 'AnnotationArray', 'JAMS', + 'AnnotationData', 'Observation'] @contextlib.contextmanager @@ -723,6 +727,113 @@ def __deepcopy__(self, memo): return jf +Observation = namedtuple('Observation', + ['time', 'duration', 'value', 'confidence']) +'''Core observation type: (time, duration, value, confidence).''' + + +class AnnotationData(object): + + __dense = False + + def __init__(self): + self.obs = SortedListWithKey(key=self._key) + + @classmethod + def _key(cls, obs): + return obs.time + + @property + def dense(self): + '''Boolean to determine whether the encoding is dense or sparse. + + Returns + ------- + dense : bool + `True` if the data should be encoded densely + `False` otherwise + ''' + return self.__dense + + @dense.setter + def dense(self, value): + '''Setter for dense''' + self.__dense = value + + def add_observation(self, time=None, duration=None, value=None, + confidence=None): + idx = self.obs.bisect_key(time) + self.obs.insert(idx, Observation(time=time, + duration=duration, + value=value, + confidence=confidence)) + + def append_records(self, records): + + for obs in records: + self.add_observation(**obs) + + def append_columns(self, columns): + + self.append_records(six.moves.zip(columns['time'], + columns['duration'], + columns['value'], + columns['confidence'])) + + def to_interval_values(self): + '''Extract observation data in a `mir_eval`-friendly format. + + Returns + ------- + intervals : np.ndarray [shape=(n, 2), dtype=float] + Start- and end-times of all valued intervals + + `intervals[i, :] = [time[i], time[i] + duration[i]]` + + labels : list + List view of value field. + ''' + ints, vals = [], [] + for obs in self.obs: + ints.append([obs.time, obs.time + obs.duration]) + vals.append(obs.value) + + return np.array(ints), vals + + @property + def __json__(self): + '''JSON encoding attribute''' + + if self.dense: + times, durations, values, confidences = [], [], [], [] + for (t, d, v, c) in self.obs: + times.append(t) + durations.append(d) + values.append(v) + confidences.append(c) + + return dict(time=times, + duration=durations, + value=values, + confidence=confidences) + else: + return [dict(time=o.time, + duration=o.duration, + value=o.value, + confidence=o.confidence) for o in self.obs] + + def __len__(self): + return len(self.obs) + + def __eq__(self, other): + return (isinstance(other, self.__class__) and + self.obs == other.obs) + + def __repr__(self): + return '<{}: {:d} observations>'.format(self.__class__.__name__, + len(self)) + + class Annotation(JObject): """Annotation base class.""" @@ -762,21 +873,24 @@ def __init__(self, namespace, data=None, annotation_metadata=None, self.annotation_metadata = AnnotationMetadata(**annotation_metadata) - if data is None: - self.data = JamsFrame() - else: - self.data = JamsFrame.from_dict(data) + self.namespace = namespace + + self.data = AnnotationData() + + # Set the data export coding to match the namespace + self.data.dense = schema.is_dense(self.namespace) + + if data is not None: + if isinstance(data, dict): + self.data.append_columns(data) + else: + self.data.append_records(data) if sandbox is None: sandbox = Sandbox() self.sandbox = Sandbox(**sandbox) - self.namespace = namespace - - # Set the data export coding to match the namespace - self.data.dense = schema.is_dense(self.namespace) - self.time = time self.duration = duration @@ -820,13 +934,7 @@ def __eq__(self, other): return False for key in self.__dict__: - value = True - if key == 'data': - value = self.__dict__[key].equals(other.__dict__[key]) - else: - value = self.__dict__[key] == other.__dict__[key] - - if not value: + if self.__dict__[key] != other.__dict__[key]: return False return True @@ -1016,10 +1124,10 @@ def trim(self, start_time, end_time, strict=False): # We do this rather than copying and directly manipulating the # annotation' data frame (which might be faster) since this way trim is # independent of the internal data representation. - for idx, obs in self.data.iterrows(): + for obs in self.data.obs: - obs_start = obs['time'].total_seconds() - obs_end = obs_start + obs['duration'].total_seconds() + obs_start = obs.time + obs_end = obs_start + obs.duration if obs_start < trim_end and obs_end > trim_start: @@ -1031,8 +1139,8 @@ def trim(self, start_time, end_time, strict=False): (new_start == obs_start and new_end == obs_end)): ann_trimmed.append(time=new_start, duration=new_duration, - value=obs['value'], - confidence=obs['confidence']) + value=obs.value, + confidence=obs.confidence) if 'trim' not in ann_trimmed.sandbox.keys(): ann_trimmed.sandbox.update( From d0196f835c775706de2786282b3f4a1a1cbec7e9 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 09:33:50 -0400 Subject: [PATCH 02/31] fixing up tests --- jams/core.py | 12 ++++++++---- tests/test_convert.py | 16 ++++++++-------- tests/util_test.py | 13 +++++++------ 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/jams/core.py b/jams/core.py index 501f7c47..1fde0153 100644 --- a/jams/core.py +++ b/jams/core.py @@ -775,10 +775,11 @@ def append_records(self, records): def append_columns(self, columns): - self.append_records(six.moves.zip(columns['time'], - columns['duration'], - columns['value'], - columns['confidence'])) + self.append_records([dict(time=t, duration=d, value=v, confidence=c) + for t,d,v,c in six.moves.zip(columns['time'], + columns['duration'], + columns['value'], + columns['confidence'])]) def to_interval_values(self): '''Extract observation data in a `mir_eval`-friendly format. @@ -833,6 +834,9 @@ def __repr__(self): return '<{}: {:d} observations>'.format(self.__class__.__name__, len(self)) + def __iter__(self): + return iter(self.obs) + class Annotation(JObject): """Annotation base class.""" diff --git a/tests/test_convert.py b/tests/test_convert.py index 2d187997..2bc96d4b 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -181,7 +181,7 @@ def test_segment_open(): eq_(ann2.namespace, 'segment_open') # Check all else is equal - pdt.assert_frame_equal(ann.data, ann2.data) + eq_(ann.data, ann2.data) def test_tag_open(): @@ -197,7 +197,7 @@ def test_tag_open(): eq_(ann2.namespace, 'tag_open') # Check all else is equal - pdt.assert_frame_equal(ann.data, ann2.data) + eq_(ann.data, ann2.data) def test_chord(): @@ -213,7 +213,7 @@ def test_chord(): eq_(ann2.namespace, 'chord', ann2) # Check all else is equal - pdt.assert_frame_equal(ann.data, ann2.data) + assert ann.data == ann2.data def test_beat_position(): @@ -236,12 +236,12 @@ def test_beat_position(): # Check the namespace eq_(ann2.namespace, 'beat') - npt.assert_allclose(ann2.data.value.values, np.arange(1, 5)) - # Check all else is equal - pdt.assert_series_equal(ann.data.time, ann2.data.time) - pdt.assert_series_equal(ann.data.duration, ann2.data.duration) - pdt.assert_series_equal(ann.data.confidence, ann2.data.confidence) + eq_(len(ann), len(ann2)) + for obs1, obs2 in zip(ann.data, ann2.data): + eq_(obs1.time, obs2.time) + eq_(obs1.duration, obs2.duration) + eq_(obs1.confidence, obs2.confidence) def test_can_convert_equal(): diff --git a/tests/util_test.py b/tests/util_test.py index 941af424..5fd4bc84 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -45,12 +45,13 @@ def __test(ns, lab, ints, y, infer_duration): _, ann = util.import_lab(ns, six.StringIO(lab), infer_duration=infer_duration) - assert np.allclose(core.timedelta_to_float(ann.data['time'].values), - ints[:, 0]) - assert np.allclose(core.timedelta_to_float(ann.data['duration'].values), - ints[:, 1] - ints[:, 0]) - for y1, y2 in zip(list(ann.data['value'].values), y): - eq_(y1, y2) + eq_(len(ints), len(ann.data)) + eq_(len(y), len(ann.data)) + + for yi, ival, obs in zip(y, ints, ann.data): + eq_(obs.time, ival[0]) + eq_(obs.duration, ival[1] - ival[0]) + eq_(obs.value, yi) for ns, lab, ints, y, inf in zip(namespace, labs, intervals, labels, durations): yield __test, ns, lab, ints, y, inf From 67a74f17a8ef9ec7e408c1c597cf27cfb471a95b Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 09:56:18 -0400 Subject: [PATCH 03/31] ported over nsconvert --- jams/core.py | 4 +-- jams/nsconvert.py | 74 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/jams/core.py b/jams/core.py index 1fde0153..f1d5442f 100644 --- a/jams/core.py +++ b/jams/core.py @@ -810,7 +810,7 @@ def __json__(self): for (t, d, v, c) in self.obs: times.append(t) durations.append(d) - values.append(v) + values.append(serialize_obj(v)) confidences.append(c) return dict(time=times, @@ -820,7 +820,7 @@ def __json__(self): else: return [dict(time=o.time, duration=o.duration, - value=o.value, + value=serialize_obj(o.value), confidence=o.confidence) for o in self.obs] def __len__(self): diff --git a/jams/nsconvert.py b/jams/nsconvert.py index 0d48413d..71643b04 100644 --- a/jams/nsconvert.py +++ b/jams/nsconvert.py @@ -16,6 +16,8 @@ from copy import deepcopy from collections import defaultdict +from .core import AnnotationData + from .exceptions import NamespaceError @@ -24,6 +26,7 @@ __all__ = ['convert', 'can_convert'] + def _conversion(target, source): '''A decorator to register namespace conversions. @@ -137,12 +140,38 @@ def can_convert(annotation, target_namespace): return False +def pop_data(annotation): + '''Replace an annotation's observation data with a fresh AnnotationData + object. + + Parameters + ---------- + annotation : jams.Annotation + + Returns + ------- + annotation_data : jams.AnnotationData + The original annotation data object + ''' + + data = annotation.data + annotation.data = AnnotationData() + annotation.data.dense = data.dense + return data + + @_conversion('pitch_contour', 'pitch_hz') def pitch_hz_to_contour(annotation): '''Convert a pitch_hz annotation to a contour''' annotation.namespace = 'pitch_contour' - annotation.data.value = [dict(index=0, frequency=np.abs(f), voiced=f > 0) - for f in annotation.data.value] + data = pop_data(annotation) + + for obs in data: + annotation.append(time=obs.time, duration=obs.duration, + confidence=obs.confidence, + value=dict(index=0, + frequency=np.abs(obs.value), + voiced=obs.value > 0)) return annotation @@ -158,7 +187,13 @@ def note_midi_to_hz(annotation): '''Convert a pitch_midi annotation to pitch_hz''' annotation.namespace = 'note_hz' - annotation.data.value = 440 * (2.0 ** ((annotation.data.value - 69.0)/12.0)) + data = pop_data(annotation) + + for obs in data: + annotation.append(time=obs.time, duration=obs.duration, + confidence=obs.confidence, + value=440 * (2.0**((obs.value - 69.0)/12.0))) + return annotation @@ -167,7 +202,14 @@ def note_hz_to_midi(annotation): '''Convert a pitch_hz annotation to pitch_midi''' annotation.namespace = 'note_midi' - annotation.data.value = 12 * (np.log2(annotation.data.value) - np.log2(440.0)) + 69 + + data = pop_data(annotation) + + for obs in data: + annotation.append(time=obs.time, duration=obs.duration, + confidence=obs.confidence, + value=12 * (np.log2(obs.value) - np.log2(440.0)) + 69) + return annotation @@ -176,7 +218,14 @@ def pitch_midi_to_hz(annotation): '''Convert a pitch_midi annotation to pitch_hz''' annotation.namespace = 'pitch_hz' - annotation.data.value = 440 * (2.0 ** ((annotation.data.value - 69.0)/12.0)) + + data = pop_data(annotation) + + for obs in data: + annotation.append(time=obs.time, duration=obs.duration, + confidence=obs.confidence, + value=440 * (2.0**((obs.value - 69.0)/12.0))) + return annotation @@ -185,7 +234,12 @@ def pitch_hz_to_midi(annotation): '''Convert a pitch_hz annotation to pitch_midi''' annotation.namespace = 'pitch_midi' - annotation.data.value = 12 * (np.log2(annotation.data.value) - np.log2(440.0)) + 69 + data = pop_data(annotation) + + for obs in data: + annotation.append(time=obs.time, duration=obs.duration, + confidence=obs.confidence, + value=12 * (np.log2(obs.value) - np.log2(440.0)) + 69) return annotation @@ -210,9 +264,15 @@ def beat_position(annotation): '''Convert beat_position to beat''' annotation.namespace = 'beat' - annotation.data.value = annotation.data.value.apply(lambda x: x['position']) + data = pop_data(annotation) + for obs in data: + annotation.append(time=obs.time, duration=obs.duration, + confidence=obs.confidence, + value=obs.value['position']) + return annotation + @_conversion('chord', 'chord_harte') def chordh_to_chord(annotation): '''Convert Harte annotation to chord''' From 19aeb2ac299aa9c08c77d2afa0f33c5921c51142 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 12:40:12 -0400 Subject: [PATCH 04/31] fixed slicing --- jams/core.py | 19 ++++++++--- jams/eval.py | 11 +++--- tests/jams_test.py | 72 ++++++++++++++++++++-------------------- tests/namespace_tests.py | 13 +++----- tests/test_convert.py | 65 ++++++++++++++++++++++-------------- 5 files changed, 102 insertions(+), 78 deletions(-) diff --git a/jams/core.py b/jams/core.py index f1d5442f..3b67d7d4 100644 --- a/jams/core.py +++ b/jams/core.py @@ -1234,16 +1234,25 @@ def slice(self, start_time, end_time, strict=False): ''' # start by trimming the annotation sliced_ann = self.trim(start_time, end_time, strict=strict) + raw_data = sliced_ann.data + sliced_ann.data = AnnotationData() + sliced_ann.data.dense = raw_data.dense # now adjust the start time of the annotation and the observations it # contains. - ref_time = sliced_ann.time - sliced_ann.time = max(0, sliced_ann.time - start_time) - adjustment = ref_time - sliced_ann.time - sliced_ann.data['time'] = sliced_ann.data['time'].apply( - lambda x: x - pd.to_timedelta(adjustment, unit='s')) + for obs in raw_data: + new_time = max(0, obs.time - start_time) + # if obs.time > start_time, + # duration doesn't change + # if obs.time < start_time, + # duration shrinks by start_time - obs.time + sliced_ann.append(time=new_time, + duration=obs.duration, + value=obs.value, + confidence=obs.confidence) + ref_time = sliced_ann.time slice_start = ref_time slice_end = ref_time + sliced_ann.duration diff --git a/jams/eval.py b/jams/eval.py index bcbed4eb..dfb487e0 100644 --- a/jams/eval.py +++ b/jams/eval.py @@ -26,7 +26,9 @@ from .nsconvert import convert -__all__ = ['beat', 'chord', 'melody', 'onset', 'segment', 'hierarchy', 'tempo', 'pattern', 'transcription'] +__all__ = ['beat', 'chord', 'melody', 'onset', + 'segment', 'hierarchy', 'tempo', + 'pattern', 'transcription'] def coerce_annotation(ann, namespace): @@ -350,12 +352,13 @@ def tempo(ref, est, **kwargs): ref = coerce_annotation(ref, 'tempo') est = coerce_annotation(est, 'tempo') - ref_tempi = ref.data['value'].values - ref_weight = ref.data['confidence'][0] - est_tempi = est.data['value'].values + ref_tempi = np.asarray([o.value for o in ref.data]) + ref_weight = ref.data.obs[0].confidence + est_tempi = np.asarray([o.value for o in est.data]) return mir_eval.tempo.evaluate(ref_tempi, ref_weight, est_tempi, **kwargs) + # melody def melody(ref, est, **kwargs): r'''Melody extraction evaluation diff --git a/tests/jams_test.py b/tests/jams_test.py index a29dde51..78a0d9fa 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -from nose.tools import raises, eq_ +from nose.tools import raises, eq_, nottest import jams @@ -128,12 +128,13 @@ def test_sandbox_contains(): # JamsFrame - +@nottest def test_jamsframe_fields(): eq_(jams.JamsFrame.fields(), ['time', 'duration', 'value', 'confidence']) +@nottest def test_jamsframe_from_df(): df = pd.DataFrame(data=[[0.0, 1.0, 'a', 0.0], @@ -162,6 +163,7 @@ def test_jamsframe_from_df(): eq_(list(jf['confidence']), [0.0, 0.0]) +@nottest def test_jamsframe_add_observation(): df = pd.DataFrame(data=[[0.0, 1.0, 'a', 0.0], [1.0, 2.0, 'b', 0.0]], @@ -179,6 +181,7 @@ def test_jamsframe_add_observation(): eq_(list(jf['confidence']), [0.0, 0.0, 0.0]) +@nottest def test_jamsframe_add_observation_fail(): @raises(jams.ParameterError) @@ -199,6 +202,7 @@ def __test(ann, time, duration, value, confidence): yield __test, ann, -1, 1.0, 'foo', 1 +@nottest def test_jamsframe_interval_values(): df = pd.DataFrame(data=[[0.0, 1.0, 'a', 0.0], @@ -213,6 +217,7 @@ def test_jamsframe_interval_values(): eq_(values, ['a', 'b']) +@nottest def test_jamsframe_serialize(): def __test(dense, data): @@ -304,14 +309,15 @@ def __test(namespace, data, amd, sandbox): eq_(dict(sandbox), dict(ann.sandbox)) if data is not None: - assert ann.data.equals(jams.JamsFrame.from_dict(data)) + eq_(len(ann.data.obs), len(data)) + for obs1, obs2 in zip(ann.data.obs, data): + eq_(obs1._asdict(), obs2) real_sandbox = jams.Sandbox(description='none') real_amd = jams.AnnotationMetadata(corpus='test collection') - real_data = dict(time=[0.0, 1.0], - duration=[0.5, 0.5], - value=['one', 'two'], - confidence=[0.9, 0.9]) + + real_data = [dict(time=0, duration=0.5, value='one', confidence=0.9), + dict(time=1.0, duration=0.5, value='two', confidence=0.9)] namespace = 'tag_open' @@ -323,10 +329,8 @@ def __test(namespace, data, amd, sandbox): def test_annotation_append(): - data = dict(time=[0.0, 1.0], - duration=[0.5, 0.5], - value=['one', 'two'], - confidence=[0.9, 0.9]) + data = [dict(time=0, duration=0.5, value='one', confidence=0.9), + dict(time=1.0, duration=0.5, value='two', confidence=0.9)] namespace = 'tag_open' @@ -336,10 +340,7 @@ def test_annotation_append(): ann.append(**update) - jf = jams.JamsFrame.from_dict(data) - jf.add_observation(**update) - - assert ann.data.equals(jf) + eq_(ann.data.obs[-1]._asdict(), update) def test_annotation_eq(): @@ -403,7 +404,7 @@ def test_annotation_array_data(): eq_(len(arr), 3) for t_ann in arr: - assert ann.data.equals(t_ann.data) + eq_(ann.data, t_ann.data) def test_annotation_array_serialize(): @@ -785,7 +786,7 @@ def test_annotation_trim_no_duration(): confidence=[None]) expected_ann = jams.Annotation(namespace, data=expected_data, time=5.0, duration=3.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) def test_annotation_trim_no_overlap(): @@ -807,7 +808,7 @@ def test_annotation_trim_no_overlap(): assert out[0].category is UserWarning assert 'does not intersect' in str(out[0].message).lower() - assert ann_trim.data.empty + assert len(ann_trim.data) == 0 assert ann_trim.time == ann.time assert ann_trim.duration == 0 @@ -839,7 +840,7 @@ def test_annotation_trim_complete_overlap(): confidence=[0.9, 0.9]) expected_ann = jams.Annotation(namespace, data=expected_data, time=8.0, duration=4.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) # with strict=True ann_trim = ann.trim(8, 12, strict=True) @@ -854,7 +855,7 @@ def test_annotation_trim_complete_overlap(): expected_data = None expected_ann = jams.Annotation(namespace, data=expected_data, time=8.0, duration=4.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) def test_annotation_trim_partial_overlap_beginning(): @@ -883,7 +884,7 @@ def test_annotation_trim_partial_overlap_beginning(): confidence=[0.9, 0.9]) expected_ann = jams.Annotation(namespace, data=expected_data, time=5.0, duration=3.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) # strict=True ann_trim = ann.trim(0, 8, strict=True) @@ -901,7 +902,7 @@ def test_annotation_trim_partial_overlap_beginning(): confidence=[0.9]) expected_ann = jams.Annotation(namespace, data=expected_data, time=5.0, duration=3.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) def test_annotation_trim_partial_overlap_end(): @@ -930,7 +931,7 @@ def test_annotation_trim_partial_overlap_end(): confidence=[0.9, 0.9]) expected_ann = jams.Annotation(namespace, data=expected_data, time=8.0, duration=7.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) # strict=True ann_trim = ann.trim(8, 20, strict=True) @@ -948,7 +949,7 @@ def test_annotation_trim_partial_overlap_end(): confidence=[0.9]) expected_ann = jams.Annotation(namespace, data=expected_data, time=8.0, duration=7.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) def test_annotation_trim_multiple(): @@ -977,7 +978,7 @@ def test_annotation_trim_multiple(): expected_ann = jams.Annotation(namespace, data=expected_data, time=8.0, duration=2.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) # strict=True ann_trim = ann.trim(0, 10, strict=True).trim(8, 20, strict=True) @@ -993,7 +994,7 @@ def test_annotation_trim_multiple(): expected_data = None expected_ann = jams.Annotation(namespace, data=expected_data, time=8.0, duration=2.0) - assert ann_trim.data.equals(expected_ann.data) + eq_(ann_trim.data, expected_ann.data) def test_jams_trim_no_duration(): @@ -1043,7 +1044,7 @@ def test_jams_trim_valid(): jam_trim = jam.trim(0, 10, strict=False) for ann in jam_trim.annotations: - assert ann.data.equals(ann_trim.data) + eq_(ann.data, ann_trim.data) assert jam_trim.file_metadata.duration == jam.file_metadata.duration assert jam_trim.sandbox.trim == [{'start_time': 0, 'end_time': 10}] @@ -1053,7 +1054,7 @@ def test_jams_trim_valid(): ann_trim = ann_copy.trim(0, 10).trim(8, 10) for ann in jam_trim.annotations: - assert ann.data.equals(ann_trim.data) + eq_(ann.data, ann_trim.data) assert jam_trim.sandbox.trim == ( [{'start_time': 0, 'end_time': 10}, {'start_time': 8, 'end_time': 10}]) @@ -1081,8 +1082,8 @@ def test_annotation_slice(): expected_ann = jams.Annotation(namespace, data=expected_data, time=0, duration=2.0) - assert ann_slice.data.equals(expected_ann.data) - assert ann_slice.sandbox.slice == ( + eq_(ann_slice.data, expected_ann.data) + eq_(ann_slice.sandbox.slice, [{'start_time': 8, 'end_time': 10, 'slice_start': 8, 'slice_end': 10}]) # Slice out range that's partially inside the time range spanned by the @@ -1095,7 +1096,7 @@ def test_annotation_slice(): expected_ann = jams.Annotation(namespace, data=expected_data, time=2.0, duration=5.0) - assert ann_slice.data.equals(expected_ann.data) + eq_(ann_slice.data, expected_ann.data) assert ann_slice.sandbox.slice == ( [{'start_time': 3, 'end_time': 10, 'slice_start': 5, 'slice_end': 10}]) @@ -1109,7 +1110,7 @@ def test_annotation_slice(): expected_ann = jams.Annotation(namespace, data=expected_data, time=0, duration=2.0) - assert ann_slice.data.equals(expected_ann.data) + eq_(ann_slice.data, expected_ann.data) assert ann_slice.sandbox.slice == ( [{'start_time': 8, 'end_time': 20, 'slice_start': 8, 'slice_end': 15}]) @@ -1122,8 +1123,7 @@ def test_annotation_slice(): expected_ann = jams.Annotation(namespace, data=expected_data, time=0, duration=2.0) - assert ann_slice.data.equals(expected_ann.data) - print(ann_slice.sandbox.slice) + eq_(ann_slice.data, expected_ann.data) assert ann_slice.sandbox.slice == ( [{'start_time': 0, 'end_time': 10, 'slice_start': 5, 'slice_end': 10}, {'start_time': 8, 'end_time': 10, 'slice_start': 8, 'slice_end': 10}]) @@ -1162,7 +1162,7 @@ def __test_error(jam, start_time, end_time, strict=False): jam_slice = jam.slice(0, 10, strict=False) for ann in jam_slice.annotations: - assert ann.data.equals(ann_slice.data) + eq_(ann.data, ann_slice.data) assert jam_slice.file_metadata.duration == 10 assert jam_slice.sandbox.slice == [{'start_time': 0, 'end_time': 10}] @@ -1172,7 +1172,7 @@ def __test_error(jam, start_time, end_time, strict=False): ann_slice = ann_copy.slice(0, 10).slice(8, 10) for ann in jam_slice.annotations: - assert ann.data.equals(ann_slice.data) + eq_(ann.data, ann_slice.data) assert jam_slice.sandbox.slice == ( [{'start_time': 0, 'end_time': 10}, {'start_time': 8, 'end_time': 10}]) diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py index b421b565..fd039d83 100644 --- a/tests/namespace_tests.py +++ b/tests/namespace_tests.py @@ -8,8 +8,7 @@ from nose.tools import raises from jams import SchemaError -from jams import Annotation -import pandas as pd +from jams import Annotation, Observation from util_test import srand @@ -30,12 +29,10 @@ def test_ns_time_invalid(): def __test(data): ann = Annotation(namespace='onset') - # Bypass the safety chceks in add_observation - ann.data.loc[0] = {'time': pd.to_timedelta(data['time'], unit='s'), - 'duration': pd.to_timedelta(data['duration'], - unit='s'), - 'value': None, - 'confdence': None} + # Bypass the safety checks in add_observation + ann.data.obs.insert(0, Observation(time=data['time'], + duration=data['duration'], + value=None, confidence=None)) ann.validate() diff --git a/tests/test_convert.py b/tests/test_convert.py index 2bc96d4b..add26c99 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -41,6 +41,7 @@ def __test(ns): for ns in jams.schema.__NAMESPACE__: yield __test, ns + def test_pitch_hz_to_contour(): ann = jams.Annotation(namespace='pitch_hz') @@ -59,16 +60,17 @@ def test_pitch_hz_to_contour(): eq_(ann2.namespace, 'pitch_contour') # Check index values - eq_(ann2.data.value.iloc[0]['index'], 0) - eq_(ann2.data.value.iloc[-1]['index'], 0) + eq_(ann2.data.obs[0].value['index'], 0) + eq_(ann2.data.obs[-1].value['index'], 0) # Check frequency - eq_(np.abs(ann2.data.value.iloc[0]['frequency']), np.abs(values[0])) - eq_(np.abs(ann2.data.value.iloc[-1]['frequency']), np.abs(values[-1])) + eq_(np.abs(ann2.data.obs[0].value['frequency']), np.abs(values[0])) + eq_(np.abs(ann2.data.obs[-1].value['frequency']), np.abs(values[-1])) # Check voicings - assert not ann2.data.value.iloc[0]['voiced'] - assert ann2.data.value.iloc[-1]['voiced'] + assert not ann2.data.obs[0].value['voiced'] + assert ann2.data.obs[-1].value['voiced'] + def test_pitch_midi_to_contour(): @@ -86,11 +88,11 @@ def test_pitch_midi_to_contour(): eq_(ann2.namespace, 'pitch_contour') # Check index values - eq_(ann2.data.value.iloc[0]['index'], 0) - eq_(ann2.data.value.iloc[-1]['index'], 0) + eq_(ann2.data.obs[0].value['index'], 0) + eq_(ann2.data.obs[-1].value['index'], 0) # Check voicings - assert ann2.data.value.iloc[-1]['voiced'] + assert ann2.data.obs[-1].value['voiced'] def test_pitch_midi_to_hz(): @@ -104,12 +106,15 @@ def test_pitch_midi_to_hz(): # Check the namespace eq_(ann2.namespace, 'pitch_hz') # midi 69 = 440.0 Hz - eq_(ann2.data.value.loc[0], 440.0) + eq_(ann2.data.obs[0].value, 440.0) # Check all else is equal - pdt.assert_series_equal(ann.data.time, ann2.data.time) - pdt.assert_series_equal(ann.data.duration, ann2.data.duration) - pdt.assert_series_equal(ann.data.confidence, ann2.data.confidence) + eq_(len(ann.data), len(ann2.data)) + + for obs1, obs2 in zip(ann.data, ann2.data): + eq_(obs1.time, obs2.time) + eq_(obs1.duration, obs2.duration) + eq_(obs1.confidence, obs2.confidence) def test_pitch_hz_to_midi(): @@ -123,12 +128,15 @@ def test_pitch_hz_to_midi(): # Check the namespace eq_(ann2.namespace, 'pitch_midi') # midi 69 = 440.0 Hz - eq_(ann2.data.value.loc[0], 69) + eq_(ann2.data.obs[0].value, 69) # Check all else is equal - pdt.assert_series_equal(ann.data.time, ann2.data.time) - pdt.assert_series_equal(ann.data.duration, ann2.data.duration) - pdt.assert_series_equal(ann.data.confidence, ann2.data.confidence) + eq_(len(ann.data), len(ann2.data)) + + for obs1, obs2 in zip(ann.data, ann2.data): + eq_(obs1.time, obs2.time) + eq_(obs1.duration, obs2.duration) + eq_(obs1.confidence, obs2.confidence) def test_note_midi_to_hz(): @@ -142,12 +150,15 @@ def test_note_midi_to_hz(): # Check the namespace eq_(ann2.namespace, 'note_hz') # midi 69 = 440.0 Hz - eq_(ann2.data.value.loc[0], 440.0) + eq_(ann2.data.obs[0].value, 440.0) # Check all else is equal - pdt.assert_series_equal(ann.data.time, ann2.data.time) - pdt.assert_series_equal(ann.data.duration, ann2.data.duration) - pdt.assert_series_equal(ann.data.confidence, ann2.data.confidence) + eq_(len(ann.data), len(ann2.data)) + + for obs1, obs2 in zip(ann.data, ann2.data): + eq_(obs1.time, obs2.time) + eq_(obs1.duration, obs2.duration) + eq_(obs1.confidence, obs2.confidence) def test_note_hz_to_midi(): @@ -161,12 +172,16 @@ def test_note_hz_to_midi(): # Check the namespace eq_(ann2.namespace, 'note_midi') # midi 69 = 440.0 Hz - eq_(ann2.data.value.loc[0], 69) + eq_(ann2.data.obs[0].value, 69) # Check all else is equal - pdt.assert_series_equal(ann.data.time, ann2.data.time) - pdt.assert_series_equal(ann.data.duration, ann2.data.duration) - pdt.assert_series_equal(ann.data.confidence, ann2.data.confidence) + eq_(len(ann.data), len(ann2.data)) + + for obs1, obs2 in zip(ann.data, ann2.data): + eq_(obs1.time, obs2.time) + eq_(obs1.duration, obs2.duration) + eq_(obs1.confidence, obs2.confidence) + def test_segment_open(): From cc5b4f2563ba3147fb645257f7fa1313763f7b77 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 12:47:17 -0400 Subject: [PATCH 05/31] fixed display and sonification --- jams/display.py | 2 +- jams/sonify.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jams/display.py b/jams/display.py index 0ada9a45..4ec12d42 100644 --- a/jams/display.py +++ b/jams/display.py @@ -88,7 +88,7 @@ def pitch_contour(annotation, **kwargs): indices = np.unique([v['index'] for v in values]) for idx in indices: - rows = annotation.data.value.apply(lambda x: x['index'] == idx).nonzero()[0] + rows = [i for (i, v) in enumerate(values) if v['index'] == idx] freqs = np.asarray([values[r]['frequency'] for r in rows]) unvoiced = ~np.asarray([values[r]['voiced'] for r in rows]) freqs[unvoiced] *= -1 diff --git a/jams/sonify.py b/jams/sonify.py index 530eaee4..079702bb 100644 --- a/jams/sonify.py +++ b/jams/sonify.py @@ -133,7 +133,7 @@ def pitch_contour(annotation, sr=22050, length=None, **kwargs): y_out = 0.0 for ix in indices: - rows = annotation.data.value.apply(lambda x: x['index'] == ix).nonzero()[0] + rows = [i for (i, v) in enumerate(values) if v['index'] == ix] freqs = np.asarray([values[r]['frequency'] for r in rows]) unv = ~np.asarray([values[r]['voiced'] for r in rows]) From e3ec170ef5b226e84c8053938548ddacbbb5f94e Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 14:06:36 -0400 Subject: [PATCH 06/31] allow importing annotationdata as Observations as well as dicts --- jams/core.py | 14 +++++++++----- tests/jams_test.py | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/jams/core.py b/jams/core.py index 3b67d7d4..3051ffa2 100644 --- a/jams/core.py +++ b/jams/core.py @@ -771,15 +771,19 @@ def add_observation(self, time=None, duration=None, value=None, def append_records(self, records): for obs in records: - self.add_observation(**obs) + if isinstance(obs, Observation): + self.add_observation(**obs._asdict()) + else: + self.add_observation(**obs) def append_columns(self, columns): self.append_records([dict(time=t, duration=d, value=v, confidence=c) - for t,d,v,c in six.moves.zip(columns['time'], - columns['duration'], - columns['value'], - columns['confidence'])]) + for (t, d, v, c) + in six.moves.zip(columns['time'], + columns['duration'], + columns['value'], + columns['confidence'])]) def to_interval_values(self): '''Extract observation data in a `mir_eval`-friendly format. diff --git a/tests/jams_test.py b/tests/jams_test.py index 78a0d9fa..36b0b011 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -549,6 +549,7 @@ def __test(ext): for ext in ['jams', 'jamz']: yield __test, ext + def test_jams_add(): def __test(): From 52a970b683e06f6d4f43ec2a9664182c28035862 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 14:12:42 -0400 Subject: [PATCH 07/31] purged jamsframe --- jams/core.py | 254 +----------------------------------------- setup.py | 2 + tests/jams_test.py | 124 +-------------------- tests/test_convert.py | 2 - 4 files changed, 9 insertions(+), 373 deletions(-) diff --git a/jams/core.py b/jams/core.py index 3051ffa2..23fff4ce 100644 --- a/jams/core.py +++ b/jams/core.py @@ -39,14 +39,12 @@ from sortedcontainers import SortedListWithKey import numpy as np -import pandas as pd import os import re import six import warnings import contextlib import gzip -import copy from .version import version as __VERSION__ from . import schema @@ -54,7 +52,7 @@ __all__ = ['load', - 'JObject', 'Sandbox', 'JamsFrame', + 'JObject', 'Sandbox', 'Annotation', 'Curator', 'AnnotationMetadata', 'FileMetadata', 'AnnotationArray', 'JAMS', 'AnnotationData', 'Observation'] @@ -500,233 +498,6 @@ class Sandbox(JObject): pass -class JamsFrame(pd.DataFrame): - '''A data-frame class for JAMS. - - This automates certain niceties, such as timestamp - conversion and serialization. - ''' - - __dense = False - - def __init__(self, data=None, index=None, columns=None, dtype=None): - '''Construct a new JamsFrame object. - - Parameters - ---------- - data - Optional data for the new JamsFrame, in any format supported - by `pandas.DataFrame.__init__`. - - Fields must be `['time', 'duration', 'value', 'confidence']`. - - `time` and `duration` fields must be floating point types, - measured in seconds. - - index - Optional index on `data`. - - columns - dtype - These parameters are ignored by JamsFrame, but are allowed - for API compatibility with `pandas.DataFrame`. - - See Also - -------- - from_dict - from_dataframe - pandas.DataFrame.__init__ - - ''' - super(JamsFrame, self).__init__(data=data, index=index, - columns=self.fields()) - - self.time = pd.to_timedelta(self.time, unit='s') - self.duration = pd.to_timedelta(self.duration, unit='s') - - @property - def dense(self): - '''Boolean to determine whether the encoding is dense or sparse. - - Returns - ------- - dense : bool - `True` if the data should be encoded densely - `False` otherwise - ''' - return self.__dense - - @dense.setter - def dense(self, value): - '''Setter for dense''' - self.__dense = value - - @classmethod - def fields(cls): - '''Fields of a JamsFrame: (time, duration, value, confidence) - - Returns - ------- - fields : list - The only permissible fields for a JamsFrame: - `time`, `duration`, `value`, and `confidence` - ''' - return ['time', 'duration', 'value', 'confidence'] - - @classmethod - def from_dict(cls, *args, **kwargs): - '''Construct a new JamsFrame from a dictionary or list of dictionaries. - - This is analogous to pd.DataFrame.from_dict, except the returned object - has the type `JamsFrame`. - - See Also - -------- - pandas.DataFrame.from_dict - from_dataframe - ''' - new_frame = super(JamsFrame, cls).from_dict(*args, **kwargs) - - return cls.from_dataframe(new_frame) - - @classmethod - def from_dataframe(cls, frame): - '''Convert a pandas DataFrame into a JamsFrame. - - Note: this operation is destructive, in that the input - DataFrame will have its type and data altered. - - Parameters - ---------- - frame : pandas.DataFrame - The input DataFrame. Must have the appropriate JamsFrame fields: - 'time', 'duration', 'value', and 'confidence'. - - 'time' and 'duration' fields should be of type `float` and measured - in seconds. - - Returns - ------- - jams_frame : JamsFrame - The input `frame` modified to form a JamsFrame. - - See Also - -------- - from_dict - ''' - # Encode time properly - frame.time = pd.to_timedelta(frame.time, unit='s') - frame.duration = pd.to_timedelta(frame.duration, unit='s') - - # Properly order the columns - frame = frame[cls.fields()] - - # Clobber the class attribute - frame.__class__ = cls - return frame - - @property - def __json__(self): - '''JSON encoding attribute''' - - def __recursive_simplify(D): - '''A simplifier for nested dictionary structures''' - if isinstance(D, list): - return [__recursive_simplify(Di) for Di in D] - - dict_out = {} - for key, value in six.iteritems(D): - if isinstance(value, dict): - dict_out[key] = __recursive_simplify(value) - else: - dict_out[key] = serialize_obj(value) - return dict_out - - # By default, we'll output a record for each row - # But, if the dense flag is set, we'll output the entire - # table as one object - - orient = 'records' - if self.dense: - orient = 'list' - - return __recursive_simplify(self.to_dict(orient=orient)) - - def add_observation(self, time=None, duration=None, - value=None, confidence=None): - '''Add a single observation event to an existing frame. - - New observations are appended to the end of the frame. - - Parameters - ---------- - time : float - The time of the new observation, in seconds - - duration : float - The duration of the new observation, in seconds - - value - confidence - The value and confidence fields of the new observation. - This should conform to the corresponding `namespace` of the - containing `Annotation` object. - - Examples - -------- - >>> frame = jams.JamsFrame() - >>> frame.add_observation(time=3, duration=1.5, value='C#') - >>> frame.add_observation(time=5, duration=.5, value='C#:min', confidence=.8) - >>> frame - time duration value confidence - 0 00:00:03 00:00:01.500000 C# NaN - 1 00:00:05 00:00:00.500000 C#:min 0.8 - ''' - - if time is None or not (time >= 0.0): - raise ParameterError('time={} must be a non-negative number'.format(time)) - - if duration is None or not (duration >= 0.0): - raise ParameterError('duration={} must be a non-negative number'.format(duration)) - - n = len(self) - self.loc[n] = {'time': pd.to_timedelta(time, unit='s'), - 'duration': pd.to_timedelta(duration, unit='s'), - 'value': value, - 'confidence': confidence} - - def to_interval_values(self): - '''Extract observation data in a `mir_eval`-friendly format. - - Returns - ------- - intervals : np.ndarray [shape=(n, 2), dtype=float] - Start- and end-times of all valued intervals - - `intervals[i, :] = [time[i], time[i] + duration[i]]` - - labels : list - List view of value field. - ''' - - times = timedelta_to_float(self.time.values) - duration = timedelta_to_float(self.duration.values) - - return np.vstack([times, times + duration]).T, list(self.value) - - def __deepcopy__(self, memo): - '''Explicit deep-copy implementation''' - jf = JamsFrame() - for field in self.fields(): - if len(self[field]): - jf[field] = copy.deepcopy(self[field]) - else: - jf[field] = [] - - jf.dense = copy.deepcopy(self.dense) - return jf - - Observation = namedtuple('Observation', ['time', 'duration', 'value', 'confidence']) '''Core observation type: (time, duration, value, confidence).''' @@ -858,8 +629,8 @@ def __init__(self, namespace, data=None, annotation_metadata=None, namespace : str The namespace for this annotation - data : dict or list-of-dict - Data for the new annotation in a format supported by `JamsFrame.from_dict` + data : dict of lists, list of dicts, or list of Observations + Data for the new annotation annotation_metadata : AnnotationMetadata (or dict), default=None. Metadata corresponding to this Annotation. @@ -919,7 +690,7 @@ def append(self, **kwargs): See Also -------- - JamsFrame.add_observation + AnnotationData.add_observation Examples -------- @@ -936,17 +707,6 @@ def append(self, **kwargs): self.data.add_observation(**kwargs) - def __eq__(self, other): - '''Override JObject equality to handle JamsFrames specially''' - if not isinstance(other, self.__class__): - return False - - for key in self.__dict__: - if self.__dict__[key] != other.__dict__[key]: - return False - - return True - def validate(self, strict=True): '''Validate this annotation object against the JAMS schema, and its data against the namespace schema. @@ -1969,15 +1729,11 @@ def match_query(string, query): def serialize_obj(obj): '''Custom serialization functionality for working with advanced data types. - - Timedelta objects are converted to floats (in seconds) - numpy arrays are converted to lists - lists are recursively serialized element-wise ''' - if isinstance(obj, pd.tslib.Timedelta): - return obj.total_seconds() - - elif isinstance(obj, np.ndarray): + if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, list): diff --git a/setup.py b/setup.py index bcdc1f7e..28dc2fe6 100644 --- a/setup.py +++ b/setup.py @@ -27,11 +27,13 @@ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", ], keywords='audio music json', license='ISC', install_requires=[ 'pandas', + 'sortedcontainers', 'jsonschema', 'numpy>=1.8.0', 'six', diff --git a/tests/jams_test.py b/tests/jams_test.py index 36b0b011..5b821f3a 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -6,19 +6,16 @@ import os import tempfile import json -import jsonschema import six import sys import warnings -import numpy as np -import pandas as pd -from nose.tools import raises, eq_, nottest +from nose.tools import raises, eq_ import jams -## Borrowed from sklearn +# Borrowed from sklearn def clean_warning_registry(): """Safe way to reset warnings """ warnings.resetwarnings() @@ -127,123 +124,6 @@ def test_sandbox_contains(): assert key in S -# JamsFrame -@nottest -def test_jamsframe_fields(): - - eq_(jams.JamsFrame.fields(), ['time', 'duration', 'value', 'confidence']) - - -@nottest -def test_jamsframe_from_df(): - - df = pd.DataFrame(data=[[0.0, 1.0, 'a', 0.0], - [1.0, 2.0, 'b', 0.0]], - columns=['time', 'duration', 'value', 'confidence']) - - jf = jams.JamsFrame.from_dataframe(df) - - # 1. type check - assert isinstance(jf, jams.JamsFrame) - - # 2. check field order - eq_(list(jf.keys().values), - jams.JamsFrame.fields()) - - # 3. check field types - assert jf['time'].dtype == np.dtype(' Date: Wed, 10 May 2017 15:14:16 -0400 Subject: [PATCH 08/31] moved pop_data to annotation --- jams/core.py | 20 +++++++++++++++++--- jams/nsconvert.py | 32 ++++++-------------------------- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/jams/core.py b/jams/core.py index 23fff4ce..e237017e 100644 --- a/jams/core.py +++ b/jams/core.py @@ -613,6 +613,7 @@ def __iter__(self): return iter(self.obs) + class Annotation(JObject): """Annotation base class.""" @@ -998,9 +999,7 @@ def slice(self, start_time, end_time, strict=False): ''' # start by trimming the annotation sliced_ann = self.trim(start_time, end_time, strict=strict) - raw_data = sliced_ann.data - sliced_ann.data = AnnotationData() - sliced_ann.data.dense = raw_data.dense + raw_data = sliced_ann.pop_data() # now adjust the start time of the annotation and the observations it # contains. @@ -1031,6 +1030,21 @@ def slice(self, start_time, end_time, strict=False): return sliced_ann + def pop_data(self): + '''Replace this observation's data with a fresh AnnotationData + object. + + Returns + ------- + annotation_data : jams.AnnotationData + The original annotation data object + ''' + + data = self.data + self.data = AnnotationData() + self.data.dense = data.dense + return data + class Curator(JObject): """Curator diff --git a/jams/nsconvert.py b/jams/nsconvert.py index 71643b04..4d209894 100644 --- a/jams/nsconvert.py +++ b/jams/nsconvert.py @@ -140,31 +140,11 @@ def can_convert(annotation, target_namespace): return False -def pop_data(annotation): - '''Replace an annotation's observation data with a fresh AnnotationData - object. - - Parameters - ---------- - annotation : jams.Annotation - - Returns - ------- - annotation_data : jams.AnnotationData - The original annotation data object - ''' - - data = annotation.data - annotation.data = AnnotationData() - annotation.data.dense = data.dense - return data - - @_conversion('pitch_contour', 'pitch_hz') def pitch_hz_to_contour(annotation): '''Convert a pitch_hz annotation to a contour''' annotation.namespace = 'pitch_contour' - data = pop_data(annotation) + data = annotation.pop_data() for obs in data: annotation.append(time=obs.time, duration=obs.duration, @@ -187,7 +167,7 @@ def note_midi_to_hz(annotation): '''Convert a pitch_midi annotation to pitch_hz''' annotation.namespace = 'note_hz' - data = pop_data(annotation) + data = annotation.pop_data() for obs in data: annotation.append(time=obs.time, duration=obs.duration, @@ -203,7 +183,7 @@ def note_hz_to_midi(annotation): annotation.namespace = 'note_midi' - data = pop_data(annotation) + data = annotation.pop_data() for obs in data: annotation.append(time=obs.time, duration=obs.duration, @@ -219,7 +199,7 @@ def pitch_midi_to_hz(annotation): annotation.namespace = 'pitch_hz' - data = pop_data(annotation) + data = annotation.pop_data() for obs in data: annotation.append(time=obs.time, duration=obs.duration, @@ -234,7 +214,7 @@ def pitch_hz_to_midi(annotation): '''Convert a pitch_hz annotation to pitch_midi''' annotation.namespace = 'pitch_midi' - data = pop_data(annotation) + data = annotation.pop_data() for obs in data: annotation.append(time=obs.time, duration=obs.duration, @@ -264,7 +244,7 @@ def beat_position(annotation): '''Convert beat_position to beat''' annotation.namespace = 'beat' - data = pop_data(annotation) + data = annotation.pop_data() for obs in data: annotation.append(time=obs.time, duration=obs.duration, confidence=obs.confidence, From 05c99d26140d7d40753bb35e1afb8e102cd7771b Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 15:19:48 -0400 Subject: [PATCH 09/31] simplified add_observation --- jams/core.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/jams/core.py b/jams/core.py index e237017e..0920665a 100644 --- a/jams/core.py +++ b/jams/core.py @@ -533,11 +533,8 @@ def dense(self, value): def add_observation(self, time=None, duration=None, value=None, confidence=None): - idx = self.obs.bisect_key(time) - self.obs.insert(idx, Observation(time=time, - duration=duration, - value=value, - confidence=confidence)) + self.obs.add(Observation(time=time, duration=duration, + value=value, confidence=confidence)) def append_records(self, records): From 1d9be64a88e71d3b9b63a0978baae43e2eceaa85 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 15:55:59 -0400 Subject: [PATCH 10/31] added dataframe export --- jams/core.py | 6 ++++++ tests/jams_test.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/jams/core.py b/jams/core.py index 0920665a..ecfc6cce 100644 --- a/jams/core.py +++ b/jams/core.py @@ -39,6 +39,7 @@ from sortedcontainers import SortedListWithKey import numpy as np +import pandas as pd import os import re import six @@ -573,6 +574,11 @@ def to_interval_values(self): return np.array(ints), vals + def to_dataframe(self): + return pd.DataFrame.from_records(list(self.obs), + columns=['time', 'duration', + 'value', 'confidence']) + @property def __json__(self): '''JSON encoding attribute''' diff --git a/tests/jams_test.py b/tests/jams_test.py index 5b821f3a..86c234a3 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -1065,3 +1065,22 @@ def __test_error(jam, start_time, end_time, strict=False): del slice_metadata['duration'] assert slice_metadata == orig_metadata assert jam_slice.file_metadata.duration == 2 + + +def test_annotation_data_frame(): + namespace = 'tag_open' + data = dict(time=[5.0, 5.0, 10.0], + duration=[2.0, 4.0, 4.0], + value=['one', 'two', 'three'], + confidence=[0.9, 0.9, 0.9]) + ann = jams.Annotation(namespace, data=data, time=5.0, duration=10.0) + + df = ann.data.to_dataframe() + + eq_(list(df.columns), ['time', 'duration', 'value', 'confidence']) + + for i, row in df.iterrows(): + eq_(row.time, data['time'][i]) + eq_(row.duration, data['duration'][i]) + eq_(row.value, data['value'][i]) + eq_(row.confidence, data['confidence'][i]) From a5efa996ce9346ddfcefbb59a6396d00a8e8b11f Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Wed, 10 May 2017 16:16:23 -0400 Subject: [PATCH 11/31] docstrings, repr_html --- jams/core.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/jams/core.py b/jams/core.py index ecfc6cce..5921b71e 100644 --- a/jams/core.py +++ b/jams/core.py @@ -534,11 +534,35 @@ def dense(self, value): def add_observation(self, time=None, duration=None, value=None, confidence=None): + '''Add a single observation. + + Parameters + ---------- + time : number >= 0, required + The start time of this observation + + duration : number >= 0, required + The duration of this observation + + value + The value for the observation + + confidence + The confidence for the observation + ''' self.obs.add(Observation(time=time, duration=duration, value=value, confidence=confidence)) def append_records(self, records): + '''Add observations from row-major storage. + + This is primarily useful for deserializing sparsely packed data. + Parameters + ---------- + records : iterable of dicts or Observations + Each element of `records` corresponds to one observation. + ''' for obs in records: if isinstance(obs, Observation): self.add_observation(**obs._asdict()) @@ -546,7 +570,17 @@ def append_records(self, records): self.add_observation(**obs) def append_columns(self, columns): + '''Add observations from column-major storage. + This is primarily used for deserializing densely packed data. + + Parameters + ---------- + columns : dict of lists + Keys must be `time, duration, value, confidence`, + and each much be a list of equal length. + + ''' self.append_records([dict(time=t, duration=d, value=v, confidence=c) for (t, d, v, c) in six.moves.zip(columns['time'], @@ -575,6 +609,14 @@ def to_interval_values(self): return np.array(ints), vals def to_dataframe(self): + '''Convert this annotation to a pandas dataframe. + + Returns + ------- + df : pd.DataFrame + Columns are `time, duration, value, confidence`. + Each row is an observation. + ''' return pd.DataFrame.from_records(list(self.obs), columns=['time', 'duration', 'value', 'confidence']) @@ -615,6 +657,42 @@ def __repr__(self): def __iter__(self): return iter(self.obs) + def to_html(self): + '''Render this annotation list in HTML + + Returns + ------- + rendered : str + An HTML table containing this annotation's data. + ''' + out = r''' + + + + + + + + + ''' + out += r'''''' + for i, o in enumerate(self.obs): + out += r''' + + + + + + '''.format(i, + o.time, + o.duration, + o.value, + o.confidence) + out += r'''
timedurationvalueconfidence
{:d}{:0.6f}{:0.6f}{:}{:}
''' + return out + + def _repr_html_(self): + return self.to_html() class Annotation(JObject): From 6df7535e019389cc5163ae2b5fe658c525dae41e Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 09:29:24 -0400 Subject: [PATCH 12/31] tests pass again --- jams/core.py | 403 +++++++++++++++++---------------------- jams/display.py | 10 +- jams/eval.py | 34 ++-- jams/nsconvert.py | 2 - jams/sonify.py | 12 +- jams/util.py | 8 +- tests/jams_test.py | 8 +- tests/namespace_tests.py | 8 +- tests/test_convert.py | 26 +-- tests/util_test.py | 9 +- 10 files changed, 230 insertions(+), 290 deletions(-) diff --git a/jams/core.py b/jams/core.py index 5921b71e..1fdd4be0 100644 --- a/jams/core.py +++ b/jams/core.py @@ -26,7 +26,6 @@ AnnotationMetadata Curator Annotation - AnnotationData Observation Sandbox JObject @@ -56,7 +55,7 @@ 'JObject', 'Sandbox', 'Annotation', 'Curator', 'AnnotationMetadata', 'FileMetadata', 'AnnotationArray', 'JAMS', - 'AnnotationData', 'Observation'] + 'Observation'] @contextlib.contextmanager @@ -504,197 +503,6 @@ class Sandbox(JObject): '''Core observation type: (time, duration, value, confidence).''' -class AnnotationData(object): - - __dense = False - - def __init__(self): - self.obs = SortedListWithKey(key=self._key) - - @classmethod - def _key(cls, obs): - return obs.time - - @property - def dense(self): - '''Boolean to determine whether the encoding is dense or sparse. - - Returns - ------- - dense : bool - `True` if the data should be encoded densely - `False` otherwise - ''' - return self.__dense - - @dense.setter - def dense(self, value): - '''Setter for dense''' - self.__dense = value - - def add_observation(self, time=None, duration=None, value=None, - confidence=None): - '''Add a single observation. - - Parameters - ---------- - time : number >= 0, required - The start time of this observation - - duration : number >= 0, required - The duration of this observation - - value - The value for the observation - - confidence - The confidence for the observation - ''' - self.obs.add(Observation(time=time, duration=duration, - value=value, confidence=confidence)) - - def append_records(self, records): - '''Add observations from row-major storage. - - This is primarily useful for deserializing sparsely packed data. - - Parameters - ---------- - records : iterable of dicts or Observations - Each element of `records` corresponds to one observation. - ''' - for obs in records: - if isinstance(obs, Observation): - self.add_observation(**obs._asdict()) - else: - self.add_observation(**obs) - - def append_columns(self, columns): - '''Add observations from column-major storage. - - This is primarily used for deserializing densely packed data. - - Parameters - ---------- - columns : dict of lists - Keys must be `time, duration, value, confidence`, - and each much be a list of equal length. - - ''' - self.append_records([dict(time=t, duration=d, value=v, confidence=c) - for (t, d, v, c) - in six.moves.zip(columns['time'], - columns['duration'], - columns['value'], - columns['confidence'])]) - - def to_interval_values(self): - '''Extract observation data in a `mir_eval`-friendly format. - - Returns - ------- - intervals : np.ndarray [shape=(n, 2), dtype=float] - Start- and end-times of all valued intervals - - `intervals[i, :] = [time[i], time[i] + duration[i]]` - - labels : list - List view of value field. - ''' - ints, vals = [], [] - for obs in self.obs: - ints.append([obs.time, obs.time + obs.duration]) - vals.append(obs.value) - - return np.array(ints), vals - - def to_dataframe(self): - '''Convert this annotation to a pandas dataframe. - - Returns - ------- - df : pd.DataFrame - Columns are `time, duration, value, confidence`. - Each row is an observation. - ''' - return pd.DataFrame.from_records(list(self.obs), - columns=['time', 'duration', - 'value', 'confidence']) - - @property - def __json__(self): - '''JSON encoding attribute''' - - if self.dense: - times, durations, values, confidences = [], [], [], [] - for (t, d, v, c) in self.obs: - times.append(t) - durations.append(d) - values.append(serialize_obj(v)) - confidences.append(c) - - return dict(time=times, - duration=durations, - value=values, - confidence=confidences) - else: - return [dict(time=o.time, - duration=o.duration, - value=serialize_obj(o.value), - confidence=o.confidence) for o in self.obs] - - def __len__(self): - return len(self.obs) - - def __eq__(self, other): - return (isinstance(other, self.__class__) and - self.obs == other.obs) - - def __repr__(self): - return '<{}: {:d} observations>'.format(self.__class__.__name__, - len(self)) - - def __iter__(self): - return iter(self.obs) - - def to_html(self): - '''Render this annotation list in HTML - - Returns - ------- - rendered : str - An HTML table containing this annotation's data. - ''' - out = r''' - - - - - - - - - ''' - out += r'''''' - for i, o in enumerate(self.obs): - out += r''' - - - - - - '''.format(i, - o.time, - o.duration, - o.value, - o.confidence) - out += r'''
timedurationvalueconfidence
{:d}{:0.6f}{:0.6f}{:}{:}
''' - return out - - def _repr_html_(self): - return self.to_html() - - class Annotation(JObject): """Annotation base class.""" @@ -736,16 +544,13 @@ def __init__(self, namespace, data=None, annotation_metadata=None, self.namespace = namespace - self.data = AnnotationData() - - # Set the data export coding to match the namespace - self.data.dense = schema.is_dense(self.namespace) + self.data = SortedListWithKey(key=self._key) if data is not None: if isinstance(data, dict): - self.data.append_columns(data) + self.append_columns(data) else: - self.data.append_records(data) + self.append_records(data) if sandbox is None: sandbox = Sandbox() @@ -755,7 +560,7 @@ def __init__(self, namespace, data=None, annotation_metadata=None, self.time = time self.duration = duration - def append(self, **kwargs): + def append(self, time=None, duration=None, value=None, confidence=None): '''Append an observation to the data field Parameters @@ -770,24 +575,52 @@ def append(self, **kwargs): Types and values should conform to the namespace of the Annotation object. - See Also - -------- - AnnotationData.add_observation - Examples -------- >>> ann = jams.Annotation(namespace='chord') - >>> ann.append(time=0, duration=3, value='C#') >>> ann.append(time=3, duration=2, value='E#') - >>> ann - - >>> ann.data - time duration value confidence - 0 00:00:00 00:00:03 C# None - 1 00:00:03 00:00:02 E# None ''' - self.data.add_observation(**kwargs) + # TODO: validate time and duration here + self.data.add(Observation(time=time, + duration=duration, + value=value, + confidence=confidence)) + + def append_records(self, records): + '''Add observations from row-major storage. + + This is primarily useful for deserializing sparsely packed data. + + Parameters + ---------- + records : iterable of dicts or Observations + Each element of `records` corresponds to one observation. + ''' + for obs in records: + if isinstance(obs, Observation): + self.append(**obs._asdict()) + else: + self.append(**obs) + + def append_columns(self, columns): + '''Add observations from column-major storage. + + This is primarily used for deserializing densely packed data. + + Parameters + ---------- + columns : dict of lists + Keys must be `time, duration, value, confidence`, + and each much be a list of equal length. + + ''' + self.append_records([dict(time=t, duration=d, value=v, confidence=c) + for (t, d, v, c) + in six.moves.zip(columns['time'], + columns['duration'], + columns['value'], + columns['confidence'])]) def validate(self, strict=True): '''Validate this annotation object against the JAMS schema, @@ -821,18 +654,9 @@ def validate(self, strict=True): ann_schema = schema.namespace(self.namespace) try: - records = self.data.__json__ - - # If the data has a dense packing, reshape it for record-wise - # validation - if self.data.dense: - records = [dict(_) - for _ in zip(*[[(k, v) for v in value] - for (k, value) in six.iteritems(records)])] - # validate each record in the frame - for rec in records: - jsonschema.validate(rec, ann_schema) + for rec in self.data: + jsonschema.validate(serialize_obj(rec), ann_schema) except jsonschema.ValidationError as invalid: if strict: @@ -974,7 +798,7 @@ def trim(self, start_time, end_time, strict=False): # We do this rather than copying and directly manipulating the # annotation' data frame (which might be faster) since this way trim is # independent of the internal data representation. - for obs in self.data.obs: + for obs in self.data: obs_start = obs.time obs_end = obs_start + obs.duration @@ -1112,20 +936,136 @@ def slice(self, start_time, end_time, strict=False): return sliced_ann def pop_data(self): - '''Replace this observation's data with a fresh AnnotationData - object. + '''Replace this observation's data with a fresh container. Returns ------- - annotation_data : jams.AnnotationData - The original annotation data object + annotation_data : SortedListWithKey + The original annotation data container ''' data = self.data - self.data = AnnotationData() - self.data.dense = data.dense + self.data = SortedListWithKey(key=self._key) return data + def to_interval_values(self): + '''Extract observation data in a `mir_eval`-friendly format. + + Returns + ------- + intervals : np.ndarray [shape=(n, 2), dtype=float] + Start- and end-times of all valued intervals + + `intervals[i, :] = [time[i], time[i] + duration[i]]` + + labels : list + List view of value field. + ''' + ints, vals = [], [] + for obs in self.data: + ints.append([obs.time, obs.time + obs.duration]) + vals.append(obs.value) + + return np.array(ints), vals + + def to_dataframe(self): + '''Convert this annotation to a pandas dataframe. + + Returns + ------- + df : pd.DataFrame + Columns are `time, duration, value, confidence`. + Each row is an observation. + ''' + return pd.DataFrame.from_records(list(self.data), + columns=['time', 'duration', + 'value', 'confidence']) + + def __len__(self): + return len(self.data) + + def __iter__(self): + return iter(self.data) + + def to_html(self): + '''Render this annotation list in HTML + + Returns + ------- + rendered : str + An HTML table containing this annotation's data. + ''' + out = r''' + + + + + + + + + ''' + out += r'''''' + for i, o in enumerate(self.data): + out += r''' + + + + + + '''.format(i, + o.time, + o.duration, + o.value, + o.confidence) + out += r'''
timedurationvalueconfidence
{:d}{:0.6f}{:0.6f}{:}{:}
''' + return out + + def _repr_html_(self): + return self.to_html() + + @property + def __json__(self): + r"""Return the JObject as a set of native data types for serialization. + + Note: attributes beginning with underscores are suppressed. + """ + filtered_dict = dict() + + for k, item in six.iteritems(self.__dict__): + if k.startswith('_'): + continue + elif k == 'data': + filtered_dict[k] = self.__json_data__ + + elif hasattr(item, '__json__'): + filtered_dict[k] = item.__json__ + else: + filtered_dict[k] = item + + return filtered_dict + + @property + def __json_data__(self): + r"""JSON-serialize the observation sequence.""" + if schema.is_dense(self.namespace): + dense_records = dict() + for field in Observation._fields: + dense_records[field] = [] + + for obs in self.data: + for key, val in six.iteritems(obs._asdict()): + dense_records[key].append(serialize_obj(val)) + + return dense_records + + else: + return [serialize_obj(_) for _ in self.data] + + @classmethod + def _key(cls, obs): + return obs.time + class Curator(JObject): """Curator @@ -1834,4 +1774,7 @@ def serialize_obj(obj): elif isinstance(obj, list): return [serialize_obj(x) for x in obj] + elif isinstance(obj, Observation): + return {k: serialize_obj(v) for k, v in six.iteritems(obj._asdict())} + return obj diff --git a/jams/display.py b/jams/display.py index 4ec12d42..ca46aa53 100644 --- a/jams/display.py +++ b/jams/display.py @@ -63,7 +63,7 @@ def pprint_jobject(obj, **kwargs): def intervals(annotation, **kwargs): '''Plotting wrapper for labeled intervals''' - times, labels = annotation.data.to_interval_values() + times, labels = annotation.to_interval_values() return mir_eval.display.labeled_intervals(times, labels, **kwargs) @@ -83,7 +83,7 @@ def pitch_contour(annotation, **kwargs): # If the annotation is empty, we need to construct a new axes ax = mir_eval.display.__get_axes(ax=ax)[0] - times, values = annotation.data.to_interval_values() + times, values = annotation.to_interval_values() indices = np.unique([v['index'] for v in values]) @@ -102,7 +102,7 @@ def pitch_contour(annotation, **kwargs): def event(annotation, **kwargs): '''Plotting wrapper for events''' - times, values = annotation.data.to_interval_values() + times, values = annotation.to_interval_values() if any(values): labels = values @@ -115,7 +115,7 @@ def event(annotation, **kwargs): def beat_position(annotation, **kwargs): '''Plotting wrapper for beat-position data''' - times, values = annotation.data.to_interval_values() + times, values = annotation.to_interval_values() labels = [_['position'] for _ in values] @@ -125,7 +125,7 @@ def beat_position(annotation, **kwargs): def piano_roll(annotation, **kwargs): '''Plotting wrapper for piano rolls''' - times, midi = annotation.data.to_interval_values() + times, midi = annotation.to_interval_values() return mir_eval.display.piano_roll(times, midi=midi, **kwargs) diff --git a/jams/eval.py b/jams/eval.py index dfb487e0..a03a17bc 100644 --- a/jams/eval.py +++ b/jams/eval.py @@ -106,8 +106,8 @@ def beat(ref, est, **kwargs): namespace = 'beat' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, _ = ref.data.to_interval_values() - est_interval, _ = est.data.to_interval_values() + ref_interval, _ = ref.to_interval_values() + est_interval, _ = est.to_interval_values() return mir_eval.beat.evaluate(ref_interval[:, 0], est_interval[:, 0], **kwargs) @@ -147,8 +147,8 @@ def onset(ref, est, **kwargs): namespace = 'onset' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, _ = ref.data.to_interval_values() - est_interval, _ = est.data.to_interval_values() + ref_interval, _ = ref.to_interval_values() + est_interval, _ = est.to_interval_values() return mir_eval.onset.evaluate(ref_interval[:, 0], est_interval[:, 0], **kwargs) @@ -189,8 +189,8 @@ def chord(ref, est, **kwargs): namespace = 'chord' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, ref_value = ref.data.to_interval_values() - est_interval, est_value = est.data.to_interval_values() + ref_interval, ref_value = ref.to_interval_values() + est_interval, est_value = est.to_interval_values() return mir_eval.chord.evaluate(ref_interval, ref_value, est_interval, est_value, **kwargs) @@ -231,8 +231,8 @@ def segment(ref, est, **kwargs): namespace = 'segment_open' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, ref_value = ref.data.to_interval_values() - est_interval, est_value = est.data.to_interval_values() + ref_interval, ref_value = ref.to_interval_values() + est_interval, est_value = est.to_interval_values() return mir_eval.segment.evaluate(ref_interval, ref_value, est_interval, est_value, **kwargs) @@ -255,7 +255,7 @@ def hierarchy_flatten(annotation): A list of lists of labels, ordered by increasing specificity. ''' - intervals, values = annotation.data.to_interval_values() + intervals, values = annotation.to_interval_values() ordering = dict() @@ -352,9 +352,9 @@ def tempo(ref, est, **kwargs): ref = coerce_annotation(ref, 'tempo') est = coerce_annotation(est, 'tempo') - ref_tempi = np.asarray([o.value for o in ref.data]) - ref_weight = ref.data.obs[0].confidence - est_tempi = np.asarray([o.value for o in est.data]) + ref_tempi = np.asarray([o.value for o in ref]) + ref_weight = ref.data[0].confidence + est_tempi = np.asarray([o.value for o in est]) return mir_eval.tempo.evaluate(ref_tempi, ref_weight, est_tempi, **kwargs) @@ -396,8 +396,8 @@ def melody(ref, est, **kwargs): namespace = 'pitch_contour' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, ref_p = ref.data.to_interval_values() - est_interval, est_p = est.data.to_interval_values() + ref_interval, ref_p = ref.to_interval_values() + est_interval, est_p = est.to_interval_values() ref_freq = np.asarray([p['frequency'] * (-1)**(~p['voiced']) for p in ref_p]) est_freq = np.asarray([p['frequency'] * (-1)**(~p['voiced']) for p in est_p]) @@ -434,7 +434,7 @@ def pattern_to_mireval(ann): patterns = defaultdict(lambda: defaultdict(list)) # Iterate over the data in interval-value format - for interval, observation in zip(*ann.data.to_interval_values()): + for interval, observation in zip(*ann.to_interval_values()): pattern_id = observation['pattern_id'] occurrence_id = observation['occurrence_id'] @@ -527,8 +527,8 @@ def transcription(ref, est, **kwargs): namespace = 'pitch_contour' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_intervals, ref_p = ref.data.to_interval_values() - est_intervals, est_p = est.data.to_interval_values() + ref_intervals, ref_p = ref.to_interval_values() + est_intervals, est_p = est.to_interval_values() ref_pitches = np.asarray([p['frequency'] * (-1)**(~p['voiced']) for p in ref_p]) est_pitches = np.asarray([p['frequency'] * (-1)**(~p['voiced']) for p in est_p]) diff --git a/jams/nsconvert.py b/jams/nsconvert.py index 4d209894..f7e3f606 100644 --- a/jams/nsconvert.py +++ b/jams/nsconvert.py @@ -16,8 +16,6 @@ from copy import deepcopy from collections import defaultdict -from .core import AnnotationData - from .exceptions import NamespaceError diff --git a/jams/sonify.py b/jams/sonify.py index 079702bb..bfe1a44f 100644 --- a/jams/sonify.py +++ b/jams/sonify.py @@ -43,7 +43,7 @@ def clicks(annotation, sr=22050, length=None, **kwargs): events such as beats or segment boundaries. ''' - interval, _ = annotation.data.to_interval_values() + interval, _ = annotation.to_interval_values() return filter_kwargs(mir_eval.sonify.clicks, interval[:, 0], fs=sr, length=length, **kwargs) @@ -56,7 +56,7 @@ def downbeat(annotation, sr=22050, length=None, **kwargs): beat_click = mkclick(440 * 2, sr=sr) downbeat_click = mkclick(440 * 3, sr=sr) - intervals, values = annotation.data.to_interval_values() + intervals, values = annotation.to_interval_values() beats, downbeats = [], [] @@ -109,7 +109,7 @@ def chord(annotation, sr=22050, length=None, **kwargs): This uses mir_eval.sonify.chords. ''' - intervals, chords = annotation.data.to_interval_values() + intervals, chords = annotation.to_interval_values() return filter_kwargs(mir_eval.sonify.chords, chords, intervals, @@ -127,7 +127,7 @@ def pitch_contour(annotation, sr=22050, length=None, **kwargs): are summed together. ''' - times, values = annotation.data.to_interval_values() + times, values = annotation.to_interval_values() indices = np.unique([v['index'] for v in values]) @@ -153,13 +153,13 @@ def pitch_contour(annotation, sr=22050, length=None, **kwargs): def piano_roll(annotation, sr=22050, length=None, **kwargs): '''Sonify a piano-roll - + This uses mir_eval.sonify.time_frequency, and is appropriate for sparse transcription data, e.g., annotations in the `note_midi` namespace. ''' - intervals, pitches = annotation.data.to_interval_values() + intervals, pitches = annotation.to_interval_values() # Construct the pitchogram pitch_map = {f: idx for idx, f in enumerate(np.unique(pitches))} diff --git a/jams/util.py b/jams/util.py index eb3cc0bc..6d754b4d 100644 --- a/jams/util.py +++ b/jams/util.py @@ -115,10 +115,10 @@ def import_lab(namespace, filename, jam=None, infer_duration=True, **parse_optio value = [x for x in row[3:] if x is not None][-1] - annotation.data.add_observation(time=time, - duration=duration, - confidence=1.0, - value=value) + annotation.append(time=time, + duration=duration, + confidence=1.0, + value=value) jam.annotations.append(annotation) diff --git a/tests/jams_test.py b/tests/jams_test.py index 86c234a3..b7e3abfe 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -189,8 +189,8 @@ def __test(namespace, data, amd, sandbox): eq_(dict(sandbox), dict(ann.sandbox)) if data is not None: - eq_(len(ann.data.obs), len(data)) - for obs1, obs2 in zip(ann.data.obs, data): + eq_(len(ann.data), len(data)) + for obs1, obs2 in zip(ann.data, data): eq_(obs1._asdict(), obs2) real_sandbox = jams.Sandbox(description='none') @@ -220,7 +220,7 @@ def test_annotation_append(): ann.append(**update) - eq_(ann.data.obs[-1]._asdict(), update) + eq_(ann.data[-1]._asdict(), update) def test_annotation_eq(): @@ -1075,7 +1075,7 @@ def test_annotation_data_frame(): confidence=[0.9, 0.9, 0.9]) ann = jams.Annotation(namespace, data=data, time=5.0, duration=10.0) - df = ann.data.to_dataframe() + df = ann.to_dataframe() eq_(list(df.columns), ['time', 'duration', 'value', 'confidence']) diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py index fd039d83..b00ec6ba 100644 --- a/tests/namespace_tests.py +++ b/tests/namespace_tests.py @@ -29,10 +29,10 @@ def test_ns_time_invalid(): def __test(data): ann = Annotation(namespace='onset') - # Bypass the safety checks in add_observation - ann.data.obs.insert(0, Observation(time=data['time'], - duration=data['duration'], - value=None, confidence=None)) + # Bypass the safety checks in append + ann.data.add(Observation(time=data['time'], + duration=data['duration'], + value=None, confidence=None)) ann.validate() diff --git a/tests/test_convert.py b/tests/test_convert.py index aaafe12f..bc7834a5 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -58,16 +58,16 @@ def test_pitch_hz_to_contour(): eq_(ann2.namespace, 'pitch_contour') # Check index values - eq_(ann2.data.obs[0].value['index'], 0) - eq_(ann2.data.obs[-1].value['index'], 0) + eq_(ann2.data[0].value['index'], 0) + eq_(ann2.data[-1].value['index'], 0) # Check frequency - eq_(np.abs(ann2.data.obs[0].value['frequency']), np.abs(values[0])) - eq_(np.abs(ann2.data.obs[-1].value['frequency']), np.abs(values[-1])) + eq_(np.abs(ann2.data[0].value['frequency']), np.abs(values[0])) + eq_(np.abs(ann2.data[-1].value['frequency']), np.abs(values[-1])) # Check voicings - assert not ann2.data.obs[0].value['voiced'] - assert ann2.data.obs[-1].value['voiced'] + assert not ann2.data[0].value['voiced'] + assert ann2.data[-1].value['voiced'] def test_pitch_midi_to_contour(): @@ -86,11 +86,11 @@ def test_pitch_midi_to_contour(): eq_(ann2.namespace, 'pitch_contour') # Check index values - eq_(ann2.data.obs[0].value['index'], 0) - eq_(ann2.data.obs[-1].value['index'], 0) + eq_(ann2.data[0].value['index'], 0) + eq_(ann2.data[-1].value['index'], 0) # Check voicings - assert ann2.data.obs[-1].value['voiced'] + assert ann2.data[-1].value['voiced'] def test_pitch_midi_to_hz(): @@ -104,7 +104,7 @@ def test_pitch_midi_to_hz(): # Check the namespace eq_(ann2.namespace, 'pitch_hz') # midi 69 = 440.0 Hz - eq_(ann2.data.obs[0].value, 440.0) + eq_(ann2.data[0].value, 440.0) # Check all else is equal eq_(len(ann.data), len(ann2.data)) @@ -126,7 +126,7 @@ def test_pitch_hz_to_midi(): # Check the namespace eq_(ann2.namespace, 'pitch_midi') # midi 69 = 440.0 Hz - eq_(ann2.data.obs[0].value, 69) + eq_(ann2.data[0].value, 69) # Check all else is equal eq_(len(ann.data), len(ann2.data)) @@ -148,7 +148,7 @@ def test_note_midi_to_hz(): # Check the namespace eq_(ann2.namespace, 'note_hz') # midi 69 = 440.0 Hz - eq_(ann2.data.obs[0].value, 440.0) + eq_(ann2.data[0].value, 440.0) # Check all else is equal eq_(len(ann.data), len(ann2.data)) @@ -170,7 +170,7 @@ def test_note_hz_to_midi(): # Check the namespace eq_(ann2.namespace, 'note_midi') # midi 69 = 440.0 Hz - eq_(ann2.data.obs[0].value, 69) + eq_(ann2.data[0].value, 69) # Check all else is equal eq_(len(ann.data), len(ann2.data)) diff --git a/tests/util_test.py b/tests/util_test.py index 5fd4bc84..a83479b2 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -4,10 +4,9 @@ import tempfile import os -from nose.tools import eq_, raises +from nose.tools import eq_ import numpy as np -import jams from jams import core, util @@ -45,10 +44,10 @@ def __test(ns, lab, ints, y, infer_duration): _, ann = util.import_lab(ns, six.StringIO(lab), infer_duration=infer_duration) - eq_(len(ints), len(ann.data)) - eq_(len(y), len(ann.data)) + eq_(len(ints), len(ann)) + eq_(len(y), len(ann)) - for yi, ival, obs in zip(y, ints, ann.data): + for yi, ival, obs in zip(y, ints, ann): eq_(obs.time, ival[0]) eq_(obs.duration, ival[1] - ival[0]) eq_(obs.value, yi) From 99a4f422de9ef6182d01246ba803d44228312efc Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 09:35:34 -0400 Subject: [PATCH 13/31] linting --- tests/jams_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/jams_test.py b/tests/jams_test.py index b7e3abfe..72bc7c1d 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -246,8 +246,8 @@ def test_annotation_eq(): assert not (ann1 == ann2) -# FileMetadata +# FileMetadata def test_filemetadata(): meta = dict(title='Test track', @@ -260,8 +260,8 @@ def test_filemetadata(): for k in meta: eq_(meta[k], dict_fm[k]) -# AnnotationArray +# AnnotationArray def test_annotation_array(): arr = jams.AnnotationArray() @@ -320,6 +320,7 @@ def test_annotation_array_index_simple(): a1, a2 = anns[i], jam.annotations[i] eq_(a1, a2) + def test_annotation_array_slice_simple(): jam = jams.JAMS() @@ -333,6 +334,7 @@ def test_annotation_array_slice_simple(): eq_(len(res), 3) assert anns[0] in res + def test_annotation_array_index_fancy(): jam = jams.JAMS() @@ -359,6 +361,7 @@ def test_annotation_array_composite(): eq_(len(jam.annotations['beat', 2::2]), 4) + @raises(IndexError) def test_annotation_array_index_error(): From 18363eb3fc3279a9fc5e7aee36d166ccdef08876 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 09:52:27 -0400 Subject: [PATCH 14/31] Removed validation-on-add from annotations --- jams/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jams/core.py b/jams/core.py index 1fdd4be0..f1f5e182 100644 --- a/jams/core.py +++ b/jams/core.py @@ -581,7 +581,6 @@ def append(self, time=None, duration=None, value=None, confidence=None): >>> ann.append(time=3, duration=2, value='E#') ''' - # TODO: validate time and duration here self.data.add(Observation(time=time, duration=duration, value=value, From 1c403efe0a8ad14f45bae0cb4df3d8417d79f7c3 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 10:10:24 -0400 Subject: [PATCH 15/31] added type check to observation container index --- jams/core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/jams/core.py b/jams/core.py index f1f5e182..36363de8 100644 --- a/jams/core.py +++ b/jams/core.py @@ -980,12 +980,16 @@ def to_dataframe(self): columns=['time', 'duration', 'value', 'confidence']) + # Collection methods def __len__(self): return len(self.data) def __iter__(self): return iter(self.data) + def __contains__(self, item): + return item in self.data + def to_html(self): '''Render this annotation list in HTML @@ -1063,6 +1067,9 @@ def __json_data__(self): @classmethod def _key(cls, obs): + if not isinstance(obs, Observation): + raise JamsError('{} must be of type jams.Observation'.format(obs)) + return obs.time From b499c301a41508c291f418948eb8bceffc52aa5a Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 10:21:18 -0400 Subject: [PATCH 16/31] removed container check --- jams/core.py | 3 --- tests/jams_test.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/jams/core.py b/jams/core.py index 36363de8..cc2e2b9c 100644 --- a/jams/core.py +++ b/jams/core.py @@ -987,9 +987,6 @@ def __len__(self): def __iter__(self): return iter(self.data) - def __contains__(self, item): - return item in self.data - def to_html(self): '''Render this annotation list in HTML diff --git a/tests/jams_test.py b/tests/jams_test.py index 72bc7c1d..4b4d1d96 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -189,6 +189,7 @@ def __test(namespace, data, amd, sandbox): eq_(dict(sandbox), dict(ann.sandbox)) if data is not None: + eq_(len(ann), len(data)) eq_(len(ann.data), len(data)) for obs1, obs2 in zip(ann.data, data): eq_(obs1._asdict(), obs2) From 7cc38dc35f554d082b164db4353d7b629b138069 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 10:27:20 -0400 Subject: [PATCH 17/31] removed annotation.__len__ override --- jams/core.py | 4 ---- tests/jams_test.py | 9 ++++++++- tests/util_test.py | 4 ++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/jams/core.py b/jams/core.py index cc2e2b9c..6d906fff 100644 --- a/jams/core.py +++ b/jams/core.py @@ -980,10 +980,6 @@ def to_dataframe(self): columns=['time', 'duration', 'value', 'confidence']) - # Collection methods - def __len__(self): - return len(self.data) - def __iter__(self): return iter(self.data) diff --git a/tests/jams_test.py b/tests/jams_test.py index 4b4d1d96..ee4c774f 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -189,7 +189,6 @@ def __test(namespace, data, amd, sandbox): eq_(dict(sandbox), dict(ann.sandbox)) if data is not None: - eq_(len(ann), len(data)) eq_(len(ann.data), len(data)) for obs1, obs2 in zip(ann.data, data): eq_(obs1._asdict(), obs2) @@ -248,6 +247,14 @@ def test_annotation_eq(): assert not (ann1 == ann2) +@raises(jams.JamsError) +def test_annotation_badtype(): + + an = jams.Annotation(namespace='tag_open') + # This should throw a jams error because NoneType can't be indexed + an.data.add(None) + + # FileMetadata def test_filemetadata(): diff --git a/tests/util_test.py b/tests/util_test.py index a83479b2..04ae1326 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -44,8 +44,8 @@ def __test(ns, lab, ints, y, infer_duration): _, ann = util.import_lab(ns, six.StringIO(lab), infer_duration=infer_duration) - eq_(len(ints), len(ann)) - eq_(len(y), len(ann)) + eq_(len(ints), len(ann.data)) + eq_(len(y), len(ann.data)) for yi, ival, obs in zip(y, ints, ann): eq_(obs.time, ival[0]) From aaeb321c004c87fa5d8b0ed1b1465dbb7f69f256 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 10:36:06 -0400 Subject: [PATCH 18/31] removed nottest from beat tracking eval --- tests/eval_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/eval_test.py b/tests/eval_test.py index f036ba18..d654f3bf 100644 --- a/tests/eval_test.py +++ b/tests/eval_test.py @@ -3,7 +3,7 @@ '''mir_eval integration tests''' import numpy as np -from nose.tools import raises, nottest +from nose.tools import raises import jams from util_test import srand @@ -28,7 +28,6 @@ def create_annotation(values, namespace='beat', offset=0.0, duration=1, confiden return ann -@nottest # Temporarily disabled due to mir_eval bug with numpy 1.12 def test_beat_valid(): ref_ann = create_annotation(values=np.arange(10) % 4 + 0.5, From fa81d8e75e86bc68dd21e1d787b07beb0a8eb451 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 10:39:30 -0400 Subject: [PATCH 19/31] added Annotation.to_event_values --- jams/core.py | 18 ++++++++++++++++++ jams/eval.py | 20 ++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/jams/core.py b/jams/core.py index 6d906fff..a2d7d98d 100644 --- a/jams/core.py +++ b/jams/core.py @@ -967,6 +967,24 @@ def to_interval_values(self): return np.array(ints), vals + def to_event_values(self): + '''Extract observation data in a `mir_eval`-friendly format. + + Returns + ------- + times : np.ndarray [shape=(n,), dtype=float] + Start-time of all observations + + labels : list + List view of value field. + ''' + ints, vals = [], [] + for obs in self.data: + ints.append(obs.time) + vals.append(obs.value) + + return np.array(ints), vals + def to_dataframe(self): '''Convert this annotation to a pandas dataframe. diff --git a/jams/eval.py b/jams/eval.py index a03a17bc..6caa90e6 100644 --- a/jams/eval.py +++ b/jams/eval.py @@ -106,10 +106,10 @@ def beat(ref, est, **kwargs): namespace = 'beat' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, _ = ref.to_interval_values() - est_interval, _ = est.to_interval_values() + ref_times, _ = ref.to_event_values() + est_times, _ = est.to_event_values() - return mir_eval.beat.evaluate(ref_interval[:, 0], est_interval[:, 0], **kwargs) + return mir_eval.beat.evaluate(ref_times, est_times, **kwargs) def onset(ref, est, **kwargs): @@ -147,10 +147,10 @@ def onset(ref, est, **kwargs): namespace = 'onset' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, _ = ref.to_interval_values() - est_interval, _ = est.to_interval_values() + ref_times, _ = ref.to_event_values() + est_times, _ = est.to_event_values() - return mir_eval.onset.evaluate(ref_interval[:, 0], est_interval[:, 0], **kwargs) + return mir_eval.onset.evaluate(ref_times, est_times, **kwargs) def chord(ref, est, **kwargs): @@ -396,14 +396,14 @@ def melody(ref, est, **kwargs): namespace = 'pitch_contour' ref = coerce_annotation(ref, namespace) est = coerce_annotation(est, namespace) - ref_interval, ref_p = ref.to_interval_values() - est_interval, est_p = est.to_interval_values() + ref_times, ref_p = ref.to_event_values() + est_times, est_p = est.to_event_values() ref_freq = np.asarray([p['frequency'] * (-1)**(~p['voiced']) for p in ref_p]) est_freq = np.asarray([p['frequency'] * (-1)**(~p['voiced']) for p in est_p]) - return mir_eval.melody.evaluate(ref_interval[:, 0], ref_freq, - est_interval[:, 0], est_freq, + return mir_eval.melody.evaluate(ref_times, ref_freq, + est_times, est_freq, **kwargs) From a67f0b3724670ed02d393a46dd5f24a7eb0ce6d3 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 10:51:13 -0400 Subject: [PATCH 20/31] switched pattern eval to event_values --- jams/eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jams/eval.py b/jams/eval.py index 6caa90e6..012dda34 100644 --- a/jams/eval.py +++ b/jams/eval.py @@ -434,11 +434,11 @@ def pattern_to_mireval(ann): patterns = defaultdict(lambda: defaultdict(list)) # Iterate over the data in interval-value format - for interval, observation in zip(*ann.to_interval_values()): + for time, observation in zip(*ann.to_event_values()): pattern_id = observation['pattern_id'] occurrence_id = observation['occurrence_id'] - obs = (interval[0], observation['midi_pitch']) + obs = (time, observation['midi_pitch']) # Push this note observation into the correct pattern/occurrence patterns[pattern_id][occurrence_id].append(obs) From 8e49244ab47deb2fef0e73ee61afe9776fea3d1b Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 19:23:01 -0400 Subject: [PATCH 21/31] recovering from merge weirdness --- jams/core.py | 1 - tests/eval_test.py | 2 -- tests/jams_test.py | 1 + 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/jams/core.py b/jams/core.py index 72f69158..bd3a5843 100644 --- a/jams/core.py +++ b/jams/core.py @@ -843,7 +843,6 @@ def trim(self, start_time, end_time, strict=False): value=obs.value, confidence=obs.confidence) - ann_trimmed.data.reset_index(drop=True, inplace=True) if 'trim' not in ann_trimmed.sandbox.keys(): ann_trimmed.sandbox.update( trim=[{'start_time': start_time, 'end_time': end_time, diff --git a/tests/eval_test.py b/tests/eval_test.py index c1e53d94..df30eaaf 100644 --- a/tests/eval_test.py +++ b/tests/eval_test.py @@ -188,7 +188,6 @@ def test_melody_valid(): # Temporarily disabling because pandas 0.20 won't allow us to # construct ill-typed observations -@nottest def test_melody_invalid(): srand() @@ -307,7 +306,6 @@ def test_transcription_valid(): # Temporarily disabled because pandas 0.20 will not allow unsafe type mixtures, # so there's no easy way to make a bad corner case -@nottest def test_transcription_invalid(): ref_jam = jams.load('fixtures/transcription_ref.jams') diff --git a/tests/jams_test.py b/tests/jams_test.py index b926eea5..4caa4267 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -10,6 +10,7 @@ import sys import warnings +import numpy as np from nose.tools import raises, eq_ try: import pandas.testing as pdt From 039fce0b8aff2000f65f1703cde6197de9deaa48 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 19:51:41 -0400 Subject: [PATCH 22/31] getting test coverage up --- jams/core.py | 2 +- tests/jams_test.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/jams/core.py b/jams/core.py index bd3a5843..fb9bde6c 100644 --- a/jams/core.py +++ b/jams/core.py @@ -149,7 +149,7 @@ def _open(name_or_fdesc, mode='r', fmt='auto'): else: # Don't know how to handle this. Raise a parameter error - raise ParameterError('Invalid filename or descriptor: {:r}'.format(name_or_fdesc)) + raise ParameterError('Invalid filename or descriptor: {}'.format(name_or_fdesc)) def load(path_or_file, validate=True, strict=True, fmt='auto'): diff --git a/tests/jams_test.py b/tests/jams_test.py index 4caa4267..bbeab2c5 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -621,6 +621,10 @@ def __test(filename, fmt): yield raises(jams.ParameterError)(__test), '{:s}.{:s}'.format(badfile, ext), 'auto' yield raises(jams.ParameterError)(__test), '{:s}.{:s}'.format(badfile, ext), ext yield raises(jams.ParameterError)(__test), '{:s}.jams'.format(badfile), ext + + # one last test, trying to load form a non-file-like object + yield raises(jams.ParameterError)(__test), None, 'auto' + os.rmdir(tdir) @@ -1145,3 +1149,24 @@ def test_annotation_data_frame(): eq_(row.duration, data['duration'][i]) eq_(row.value, data['value'][i]) eq_(row.confidence, data['confidence'][i]) + + +def test_deprecated(): + + @jams.core.deprecated('old version', 'new version') + def _foo(): + pass + + warnings.resetwarnings() + warnings.simplefilter('always') + with warnings.catch_warnings(record=True) as out: + _foo() + + # And that the warning triggered + assert len(out) > 0 + + # And that the category is correct + assert out[0].category is DeprecationWarning + + # And that it says the right thing (roughly) + assert 'deprecated' in str(out[0].message).lower() From 05f6460c6597eebe7012119f9d7c94bfb9585a03 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Sat, 13 May 2017 19:53:39 -0400 Subject: [PATCH 23/31] getting test coverage up --- tests/jams_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/jams_test.py b/tests/jams_test.py index bbeab2c5..7070dfc2 100644 --- a/tests/jams_test.py +++ b/tests/jams_test.py @@ -54,6 +54,9 @@ def test_jobject_serialize(): J = jams.JObject(**data) + # Stick a dummy _value in for testing + J._dummy = True + json_jobject = J.dumps(indent=2) # De-serialize into dicts From f852a4a96ee08dc8569986f7e2f93b65e645ca3c Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 11:21:55 -0400 Subject: [PATCH 24/31] fixed a type output error in to_interval_values --- jams/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jams/core.py b/jams/core.py index fb9bde6c..be71418b 100644 --- a/jams/core.py +++ b/jams/core.py @@ -995,6 +995,9 @@ def to_interval_values(self): ints.append([obs.time, obs.time + obs.duration]) vals.append(obs.value) + if not ints: + return np.empty(shape=(0, 2), dtype=float), [] + return np.array(ints), vals def to_event_values(self): From 61147acb41ee1efb2245a7e06b80cd494778333f Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:05:12 -0400 Subject: [PATCH 25/31] linting --- jams/core.py | 61 ++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/jams/core.py b/jams/core.py index be71418b..4e3af392 100644 --- a/jams/core.py +++ b/jams/core.py @@ -33,22 +33,19 @@ """ import json -import jsonschema from collections import namedtuple -from sortedcontainers import SortedListWithKey -import numpy as np -import pandas as pd import os import re -import six import warnings import contextlib import gzip +import six -import copy -import sys - +import numpy as np +import pandas as pd +import jsonschema +from sortedcontainers import SortedListWithKey from decorator import decorator from .version import version as __VERSION__ @@ -62,6 +59,7 @@ 'FileMetadata', 'AnnotationArray', 'JAMS', 'Observation'] + def deprecated(version, version_removed): '''This is a decorator which can be used to mark functions as deprecated. @@ -145,11 +143,13 @@ def _open(name_or_fdesc, mode='r', fmt='auto'): yield fdesc except KeyError: - raise ParameterError('Unknown JAMS extension format: "{:s}"'.format(ext)) + raise ParameterError('Unknown JAMS extension ' + 'format: "{:s}"'.format(ext)) else: # Don't know how to handle this. Raise a parameter error - raise ParameterError('Invalid filename or descriptor: {}'.format(name_or_fdesc)) + raise ParameterError('Invalid filename or ' + 'descriptor: {}'.format(name_or_fdesc)) def load(path_or_file, validate=True, strict=True, fmt='auto'): @@ -432,7 +432,8 @@ def search(self, **kwargs): ------- match : bool `True` if any of the search keys match the specified value, - `False` otherwise, or if the search keys do not exist within the object. + `False` otherwise, or if the search keys do not exist + within the object. Examples -------- @@ -579,7 +580,6 @@ def __init__(self, namespace, data=None, annotation_metadata=None, else: self.append_records(data) - if sandbox is None: sandbox = Sandbox() @@ -663,7 +663,8 @@ def validate(self, strict=True): ------- valid : bool `True` if the object conforms to schema. - `False` if the object fails to conform to schema, but `strict == False`. + `False` if the object fails to conform to schema, + but `strict == False`. Raises ------ @@ -790,9 +791,9 @@ def trim(self, start_time, end_time, strict=False): orig_time = start_time orig_duration = end_time - start_time warnings.warn( - "Annotation.duration is not defined, cannot check for temporal " - "intersection, assuming the annotation is valid between " - "start_time and end_time.") + "Annotation.duration is not defined, cannot check " + "for temporal intersection, assuming the annotation " + "is valid between start_time and end_time.") else: orig_time = self.time orig_duration = self.duration @@ -962,7 +963,6 @@ def slice(self, start_time, end_time, strict=False): return sliced_ann - def pop_data(self): '''Replace this observation's data with a fresh container. @@ -1053,7 +1053,7 @@ def to_html(self): ''' out += r'''''' - for i, o in enumerate(self.data): + for i, obs in enumerate(self.data): out += r''' {:d} {:0.6f} @@ -1061,14 +1061,15 @@ def to_html(self): {:} {:} '''.format(i, - o.time, - o.duration, - o.value, - o.confidence) + obs.time, + obs.duration, + obs.value, + obs.confidence) out += r'''''' return out def _repr_html_(self): + '''Render annotation as HTML. See also: `to_html()`''' return self.to_html() @property @@ -1111,6 +1112,7 @@ def __json_data__(self): @classmethod def _key(cls, obs): + '''Provides sorting index for Observation objects''' if not isinstance(obs, Observation): raise JamsError('{} must be of type jams.Observation'.format(obs)) @@ -1248,7 +1250,8 @@ class AnnotationArray(list): are supported: - integer or slice : acts just as in `list`, e.g., `arr[0]` or `arr[1:3]` - - string : acts like a search, e.g., `arr['beat'] == arr.search(namespace='beat')` + - string : acts like a search, e.g., + `arr['beat'] == arr.search(namespace='beat')` - (string, integer or slice) acts like a search followed by index/slice Examples @@ -1733,13 +1736,6 @@ def slice(self, start_time, end_time, strict=False): # -- Helper functions -- # - -def timedelta_to_float(t): - '''Convert a timedelta64[ns] to floating point (seconds)''' - - return t.astype(np.float) * 1e-9 - - def query_pop(query, prefix, sep='.'): '''Pop a prefix from a query string. @@ -1804,7 +1800,8 @@ def match_query(string, query): if six.callable(query): return query(string) - elif isinstance(query, six.string_types) and isinstance(string, six.string_types): + elif (isinstance(query, six.string_types) and + isinstance(string, six.string_types)): return re.match(query, string) is not None else: @@ -1829,5 +1826,3 @@ def serialize_obj(obj): return {k: serialize_obj(v) for k, v in six.iteritems(obj._asdict())} return obj - - From 6aa44608b25a66f463c6122798353e30893aa1c9 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:08:31 -0400 Subject: [PATCH 26/31] linting and style --- jams/__init__.py | 7 ++++--- jams/exceptions.py | 3 +++ jams/nsconvert.py | 3 +-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/jams/__init__.py b/jams/__init__.py index 44fc72c4..c85b0b87 100644 --- a/jams/__init__.py +++ b/jams/__init__.py @@ -1,6 +1,9 @@ #!/usr/bin/env python """Top-level module for JAMS""" +import os +from pkg_resources import resource_filename + # Import the necessary modules from .exceptions import * from . import util @@ -12,15 +15,13 @@ from .core import * from .nsconvert import convert -# Populate the namespace mapping -from pkg_resources import resource_filename +# Populate the namespace mapping for _ in util.find_with_extension(resource_filename(__name__, schema.NS_SCHEMA_DIR), 'json'): schema.add_namespace(_) # Populate local namespaces -import os try: for _ in util.find_with_extension(os.environ['JAMS_SCHEMA_DIR'], 'json'): diff --git a/jams/exceptions.py b/jams/exceptions.py index 5b1ec336..8d9aeb70 100644 --- a/jams/exceptions.py +++ b/jams/exceptions.py @@ -2,6 +2,7 @@ # -*- encoding: utf-8 -*- '''Exception classes for JAMS''' + class JamsError(Exception): '''The root JAMS exception class''' pass @@ -11,10 +12,12 @@ class SchemaError(JamsError): '''Exceptions relating to schema validation''' pass + class NamespaceError(JamsError): '''Exceptions relating to task namespaces''' pass + class ParameterError(JamsError): '''Exceptions relating to function and method parameters''' pass diff --git a/jams/nsconvert.py b/jams/nsconvert.py index 43a62554..b5d1c8e0 100644 --- a/jams/nsconvert.py +++ b/jams/nsconvert.py @@ -68,7 +68,7 @@ def convert(annotation, target_namespace): ------ SchemaError if the input annotation fails to validate - + NamespaceError if no conversion is possible @@ -181,7 +181,6 @@ def note_hz_to_midi(annotation): annotation.namespace = 'note_midi' - data = annotation.pop_data() for obs in data: From b965a316d41298d5e717aaaf3a7667d429b304bf Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:16:00 -0400 Subject: [PATCH 27/31] removed timedelta_to_float --- tests/util_test.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/util_test.py b/tests/util_test.py index 04ae1326..9b7b6aaf 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -56,17 +56,6 @@ def __test(ns, lab, ints, y, infer_duration): yield __test, ns, lab, ints, y, inf -def test_timedelta_to_float(): - - # 2.5 seconds - t = 2.5 - x = np.timedelta64(int(t * 1e9)) - tn = core.timedelta_to_float(x) - - # convert back - assert np.allclose(t, tn) - - def test_query_pop(): def __test(query, prefix, sep, target): From cf8144415b07d7b0c137bb840c63787e2cea3d2e Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:26:00 -0400 Subject: [PATCH 28/31] removed dangling entry from util docstring index --- jams/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jams/util.py b/jams/util.py index 24774279..6d754b4d 100644 --- a/jams/util.py +++ b/jams/util.py @@ -12,7 +12,6 @@ smkdirs filebase find_with_extension - _deprecated """ import os From c483fcc93604a4876f4caa88535c6d394af1b8ea Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:41:31 -0400 Subject: [PATCH 29/31] upgrading docs --- docs/examples/example_beat_output.jams | 1188 ++++++++++++++++-------- docs/examples/example_chord.jams | 584 ++++++------ docs/examples/example_chord_import.py | 10 +- jams/schema.py | 5 +- 4 files changed, 1118 insertions(+), 669 deletions(-) diff --git a/docs/examples/example_beat_output.jams b/docs/examples/example_beat_output.jams index 3f0f7fd3..f633c4bd 100644 --- a/docs/examples/example_beat_output.jams +++ b/docs/examples/example_beat_output.jams @@ -1,385 +1,835 @@ { - "sandbox": {}, + "sandbox": {}, + "file_metadata": { + "duration": 61.45886621315193, + "title": "", + "release": "", + "identifiers": {}, + "artist": "", + "jams_version": "0.2.3" + }, "annotations": [ { - "namespace": "beat", - "sandbox": {}, - "time": 0, - "duration": null, + "sandbox": {}, + "duration": null, + "data": [ + { + "value": null, + "confidence": null, + "time": 0.11609977324263039, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 0.5572789115646258, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 0.9984580498866213, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 1.4628571428571429, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 1.9272562358276644, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 2.391655328798186, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 2.8328344671201813, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 3.297233560090703, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 3.7616326530612243, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 4.2260317460317465, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 4.690430839002268, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 5.154829931972789, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 5.61922902494331, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 6.0836281179138325, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 6.524807256235827, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 6.989206349206349, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 7.453605442176871, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 7.918004535147392, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 8.382403628117913, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 8.870022675736962, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 9.311201814058958, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 9.775600907029478, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 10.24, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 10.704399092970522, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 11.145578231292516, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 11.609977324263038, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 12.07437641723356, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 12.538775510204081, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 13.003174603174603, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 13.467573696145125, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 13.931972789115646, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 14.396371882086168, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 14.837551020408164, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 15.27873015873016, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 15.74312925170068, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 16.207528344671204, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 16.671927437641724, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 17.11310657596372, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 17.600725623582765, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 18.04190476190476, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 18.52952380952381, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 18.970702947845805, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 19.435102040816325, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 19.89950113378685, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 20.36390022675737, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 20.805079365079365, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 21.292698412698414, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 21.73387755102041, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 22.221496598639455, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 22.66267573696145, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 23.127074829931974, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 23.591473922902495, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 24.055873015873015, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 24.49705215419501, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 24.961451247165535, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 25.425850340136055, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 25.913469387755104, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 26.354648526077096, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 26.81904761904762, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 27.28344671201814, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 27.74784580498866, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 28.189024943310656, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 28.65342403628118, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 29.1178231292517, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 29.60544217687075, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 30.06984126984127, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 30.53424036281179, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 30.975419501133786, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 31.43981859410431, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 31.880997732426305, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 32.36861678004535, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 32.833015873015874, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 33.29741496598639, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 33.73859410430839, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 34.202993197278914, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 34.66739229024943, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 35.131791383219955, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 35.57297052154195, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 36.060589569160996, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 36.52498866213152, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 36.989387755102044, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 37.430566893424036, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 37.89496598639456, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 38.35936507936508, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 38.8237641723356, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 39.2649433106576, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 39.75256235827664, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 40.216961451247165, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 40.68136054421769, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 41.12253968253968, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 41.586938775510205, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 42.05133786848072, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 42.515736961451246, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 42.956916099773245, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 43.44453514739229, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 43.885714285714286, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 44.373333333333335, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 44.83773242630385, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 45.302131519274376, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 45.7665306122449, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 46.20770975056689, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 46.672108843537416, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 47.13650793650794, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 47.600907029478456, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 48.06530612244898, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 48.529705215419504, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 48.99410430839002, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 49.458503401360545, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 49.92290249433107, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 50.387301587301586, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 50.85170068027211, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 51.2928798185941, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 51.757278911564626, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 52.22167800453515, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 52.68607709750567, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 53.15047619047619, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 53.614875283446715, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 54.05605442176871, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 54.52045351473923, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 54.98485260770975, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 55.44925170068027, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 55.913650793650795, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 56.37804988662131, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 56.842448979591836, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 57.30684807256236, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 57.77124716553288, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 58.2356462585034, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 58.6768253968254, + "duration": 0.0 + }, + { + "value": null, + "confidence": null, + "time": 59.14122448979592, + "duration": 0.0 + } + ], + "namespace": "beat", + "time": 0, "annotation_metadata": { - "annotation_tools": "", + "corpus": "", + "validation": "", + "annotation_tools": "", + "version": "", "curator": { - "name": "", + "name": "", "email": "" - }, - "annotator": {}, - "version": "", - "corpus": "", - "annotation_rules": "", - "validation": "", + }, + "annotation_rules": "", + "annotator": {}, "data_source": "librosa beat tracker" - }, + } + }, + { + "sandbox": {}, + "duration": 61.45886621315193, "data": [ { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 7.430385 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 8.289524 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 9.218322 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 10.1239 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 11.145578 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 12.190476 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 13.212154 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 14.140952 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 15.27873 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 16.207528 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 17.113107 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 18.041905 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 18.970703 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 19.899501 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 20.805079 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 21.733878 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 22.662676 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 23.591474 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 24.497052 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 25.42585 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 26.354649 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 27.283447 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 28.189025 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 29.117823 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 30.069841 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 30.97542 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 31.880998 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 32.833016 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 33.738594 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 34.667392 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 35.572971 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 36.524989 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 37.453787 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 38.359365 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 39.264942 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 40.216961 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 41.14576 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 42.051338 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 42.956916 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 43.885714 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 44.837732 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 45.97551 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 46.904308 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 47.833107 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 48.761905 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 49.667483 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 50.596281 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 51.525078 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 52.453878 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 53.359456 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 54.288254 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 55.217052 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 56.12263 - }, - { - "duration": 0.0, - "confidence": NaN, - "value": NaN, - "time": 57.051429 + "value": 129.19921875, + "confidence": 1.0, + "time": 0.0, + "duration": 61.45886621315193 } - ] - }, - { - "namespace": "tempo", - "sandbox": {}, - "time": 0, - "duration": 61.45886621315193, + ], + "namespace": "tempo", + "time": 0, "annotation_metadata": { - "annotation_tools": "", + "corpus": "", + "validation": "", + "annotation_tools": "", + "version": "", "curator": { - "name": "", + "name": "", "email": "" - }, - "annotator": {}, - "version": "", - "corpus": "", - "annotation_rules": "", - "validation": "", + }, + "annotation_rules": "", + "annotator": {}, "data_source": "librosa tempo estimator" - }, - "data": [ - { - "duration": 61.458866, - "confidence": 1.0, - "value": 64.599609375, - "time": 0.0 - } - ] + } } - ], - "file_metadata": { - "jams_version": "0.2.0", - "title": "", - "identifiers": {}, - "release": "", - "duration": 61.45886621315193, - "artist": "" - } + ] } \ No newline at end of file diff --git a/docs/examples/example_chord.jams b/docs/examples/example_chord.jams index 7f5959d6..5b106559 100644 --- a/docs/examples/example_chord.jams +++ b/docs/examples/example_chord.jams @@ -1,406 +1,406 @@ { - "sandbox": {}, "annotations": [ { - "namespace": "chord", - "sandbox": {}, - "time": 0, - "duration": 175.804082, - "annotation_metadata": { - "annotation_tools": "", - "curator": { - "name": "", - "email": "" - }, - "annotator": {}, - "version": "", - "corpus": "", - "annotation_rules": "", - "validation": "", - "data_source": "" - }, + "duration": 175.804082, "data": [ { - "duration": 2.612267, - "confidence": 1.0, - "value": "N", + "duration": 2.6122669999999997, + "value": "N", + "confidence": 1.0, "time": 0.0 - }, + }, { - "duration": 8.846803, - "confidence": 1.0, - "value": "E", - "time": 2.612267 - }, + "duration": 8.846803000000001, + "value": "E", + "confidence": 1.0, + "time": 2.6122669999999997 + }, { - "duration": 1.462857, - "confidence": 1.0, - "value": "A", + "duration": 1.4628569999999996, + "value": "A", + "confidence": 1.0, "time": 11.45907 - }, + }, { - "duration": 4.521547, - "confidence": 1.0, - "value": "E", + "duration": 4.521546999999998, + "value": "E", + "confidence": 1.0, "time": 12.921927 - }, + }, { - "duration": 2.966888, - "confidence": 1.0, - "value": "B", + "duration": 2.966888000000001, + "value": "B", + "confidence": 1.0, "time": 17.443474 - }, + }, { - "duration": 1.497687, - "confidence": 1.0, - "value": "E", + "duration": 1.497686999999999, + "value": "E", + "confidence": 1.0, "time": 20.410362 - }, + }, { - "duration": 1.462858, - "confidence": 1.0, - "value": "E:7/3", + "duration": 1.4628580000000007, + "value": "E:7/3", + "confidence": 1.0, "time": 21.908049 - }, + }, { - "duration": 1.486077, - "confidence": 1.0, - "value": "A", + "duration": 1.4860770000000016, + "value": "A", + "confidence": 1.0, "time": 23.370907 - }, + }, { - "duration": 1.486077, - "confidence": 1.0, - "value": "A:min/b3", + "duration": 1.486076999999998, + "value": "A:min/b3", + "confidence": 1.0, "time": 24.856984 - }, + }, { - "duration": 1.497687, - "confidence": 1.0, - "value": "E", + "duration": 1.497686999999999, + "value": "E", + "confidence": 1.0, "time": 26.343061 - }, + }, { - "duration": 1.509297, - "confidence": 1.0, - "value": "B", - "time": 27.840748 - }, + "duration": 1.5092970000000037, + "value": "B", + "confidence": 1.0, + "time": 27.840747999999998 + }, { - "duration": 5.955918, - "confidence": 1.0, - "value": "E", + "duration": 5.955917999999997, + "value": "E", + "confidence": 1.0, "time": 29.350045 - }, + }, { - "duration": 1.497687, - "confidence": 1.0, - "value": "A", + "duration": 1.497686999999999, + "value": "A", + "confidence": 1.0, "time": 35.305963 - }, + }, { - "duration": 4.459452, - "confidence": 1.0, - "value": "E", + "duration": 4.459452000000006, + "value": "E", + "confidence": 1.0, "time": 36.80365 - }, + }, { - "duration": 2.982544, - "confidence": 1.0, - "value": "B", + "duration": 2.982543999999997, + "value": "B", + "confidence": 1.0, "time": 41.263102 - }, + }, { - "duration": 1.474467, - "confidence": 1.0, - "value": "E", + "duration": 1.474466999999997, + "value": "E", + "confidence": 1.0, "time": 44.245646 - }, + }, { - "duration": 1.486077, - "confidence": 1.0, - "value": "E:7/3", + "duration": 1.4860770000000016, + "value": "E:7/3", + "confidence": 1.0, "time": 45.720113 - }, + }, { - "duration": 1.486077, - "confidence": 1.0, - "value": "A", + "duration": 1.4860770000000016, + "value": "A", + "confidence": 1.0, "time": 47.20619 - }, + }, { - "duration": 1.462857, - "confidence": 1.0, - "value": "A:min/b3", + "duration": 1.4628569999999996, + "value": "A:min/b3", + "confidence": 1.0, "time": 48.692267 - }, + }, { - "duration": 1.497687, - "confidence": 1.0, - "value": "E", + "duration": 1.497686999999999, + "value": "E", + "confidence": 1.0, "time": 50.155124 - }, + }, { - "duration": 1.486077, - "confidence": 1.0, - "value": "B", + "duration": 1.4860770000000016, + "value": "B", + "confidence": 1.0, "time": 51.652811 - }, + }, { - "duration": 2.972155, - "confidence": 1.0, - "value": "E", + "duration": 2.9721550000000008, + "value": "E", + "confidence": 1.0, "time": 53.138888 - }, + }, { - "duration": 9.020952, - "confidence": 1.0, - "value": "A", + "duration": 9.020951999999987, + "value": "A", + "confidence": 1.0, "time": 56.111043 - }, + }, { - "duration": 3.018594, - "confidence": 1.0, - "value": "B", - "time": 65.131995 - }, + "duration": 3.0185940000000073, + "value": "B", + "confidence": 1.0, + "time": 65.13199499999999 + }, { - "duration": 3.041814, - "confidence": 1.0, - "value": "A", + "duration": 3.0418140000000022, + "value": "A", + "confidence": 1.0, "time": 68.150589 - }, + }, { - "duration": 3.006984, - "confidence": 1.0, - "value": "E", + "duration": 3.0069840000000028, + "value": "E", + "confidence": 1.0, "time": 71.192403 - }, + }, { - "duration": 1.497687, - "confidence": 1.0, - "value": "A", + "duration": 1.497686999999999, + "value": "A", + "confidence": 1.0, "time": 74.199387 - }, + }, { - "duration": 4.539501, - "confidence": 1.0, - "value": "E", + "duration": 4.539501000000001, + "value": "E", + "confidence": 1.0, "time": 75.697074 - }, + }, { - "duration": 2.972155, - "confidence": 1.0, - "value": "B", + "duration": 2.9721550000000008, + "value": "B", + "confidence": 1.0, "time": 80.236575 - }, + }, { - "duration": 3.012963, - "confidence": 1.0, - "value": "E", + "duration": 3.012962999999999, + "value": "E", + "confidence": 1.0, "time": 83.20873 - }, + }, { - "duration": 1.514928, - "confidence": 1.0, - "value": "A", + "duration": 1.5149279999999976, + "value": "A", + "confidence": 1.0, "time": 86.221693 - }, + }, { - "duration": 1.520907, - "confidence": 1.0, - "value": "A:min/b3", + "duration": 1.5209070000000082, + "value": "A:min/b3", + "confidence": 1.0, "time": 87.736621 - }, + }, { - "duration": 1.462857, - "confidence": 1.0, - "value": "E", - "time": 89.257527 - }, + "duration": 1.4628569999999854, + "value": "E", + "confidence": 1.0, + "time": 89.25752800000001 + }, { - "duration": 1.437068, - "confidence": 1.0, - "value": "B", + "duration": 1.4370680000000107, + "value": "B", + "confidence": 1.0, "time": 90.720385 - }, + }, { - "duration": 11.949236, - "confidence": 1.0, - "value": "E", + "duration": 11.949235999999999, + "value": "E", + "confidence": 1.0, "time": 92.157453 - }, + }, { - "duration": 3.018594, - "confidence": 1.0, - "value": "B", + "duration": 3.0185940000000073, + "value": "B", + "confidence": 1.0, "time": 104.106689 - }, + }, { - "duration": 3.053424, - "confidence": 1.0, - "value": "E", - "time": 107.125283 - }, + "duration": 3.0534239999999926, + "value": "E", + "confidence": 1.0, + "time": 107.12528300000001 + }, { - "duration": 2.94538, - "confidence": 1.0, - "value": "A", + "duration": 2.9453800000000143, + "value": "A", + "confidence": 1.0, "time": 110.178707 - }, - { - "duration": 1.489631, - "confidence": 1.0, - "value": "E", - "time": 113.124087 - }, - { - "duration": 1.486077, - "confidence": 1.0, - "value": "B", - "time": 114.613718 - }, - { - "duration": 2.845166, - "confidence": 1.0, - "value": "E", + }, + { + "duration": 1.4896309999999744, + "value": "E", + "confidence": 1.0, + "time": 113.12408700000002 + }, + { + "duration": 1.4860770000000088, + "value": "B", + "confidence": 1.0, + "time": 114.61371799999999 + }, + { + "duration": 2.845166000000006, + "value": "E", + "confidence": 1.0, "time": 116.099795 - }, + }, { - "duration": 9.101501, - "confidence": 1.0, - "value": "A", + "duration": 9.101501000000013, + "value": "A", + "confidence": 1.0, "time": 118.944961 - }, + }, { - "duration": 3.006984, - "confidence": 1.0, - "value": "B", - "time": 128.046462 - }, + "duration": 3.0069839999999886, + "value": "B", + "confidence": 1.0, + "time": 128.04646200000002 + }, { - "duration": 2.983764, - "confidence": 1.0, - "value": "A", + "duration": 2.983764000000008, + "value": "A", + "confidence": 1.0, "time": 131.053446 - }, + }, { - "duration": 3.006985, - "confidence": 1.0, - "value": "E", - "time": 134.03721 - }, + "duration": 3.006984999999986, + "value": "E", + "confidence": 1.0, + "time": 134.03721000000002 + }, { - "duration": 1.431329, - "confidence": 1.0, - "value": "A", + "duration": 1.4313290000000052, + "value": "A", + "confidence": 1.0, "time": 137.044195 - }, + }, { - "duration": 4.582639, - "confidence": 1.0, - "value": "E", + "duration": 4.582638999999972, + "value": "E", + "confidence": 1.0, "time": 138.475524 - }, - { - "duration": 2.983764, - "confidence": 1.0, - "value": "B", - "time": 143.058163 - }, - { - "duration": 1.509297, - "confidence": 1.0, - "value": "E", - "time": 146.041927 - }, - { - "duration": 1.509297, - "confidence": 1.0, - "value": "E:7/3", + }, + { + "duration": 2.9837640000000363, + "value": "B", + "confidence": 1.0, + "time": 143.05816299999998 + }, + { + "duration": 1.5092969999999752, + "value": "E", + "confidence": 1.0, + "time": 146.04192700000002 + }, + { + "duration": 1.5092970000000037, + "value": "E:7/3", + "confidence": 1.0, "time": 147.551224 - }, + }, { - "duration": 1.451247, - "confidence": 1.0, - "value": "A", + "duration": 1.451246999999995, + "value": "A", + "confidence": 1.0, "time": 149.060521 - }, + }, { - "duration": 1.509297, - "confidence": 1.0, - "value": "A:min/b3", + "duration": 1.5092970000000037, + "value": "A:min/b3", + "confidence": 1.0, "time": 150.511768 - }, + }, { - "duration": 1.509297, - "confidence": 1.0, - "value": "E", + "duration": 1.509297000000032, + "value": "E", + "confidence": 1.0, "time": 152.021065 - }, + }, { - "duration": 1.532517, - "confidence": 1.0, - "value": "B", - "time": 153.530362 - }, + "duration": 1.5325169999999844, + "value": "B", + "confidence": 1.0, + "time": 153.53036200000003 + }, { - "duration": 4.469842, - "confidence": 1.0, - "value": "E", + "duration": 4.469842, + "value": "E", + "confidence": 1.0, "time": 155.062879 - }, + }, { - "duration": 1.532517, - "confidence": 1.0, - "value": "B", + "duration": 1.5325169999999844, + "value": "B", + "confidence": 1.0, "time": 159.532721 - }, + }, { - "duration": 4.516281, - "confidence": 1.0, - "value": "E", + "duration": 4.516280999999992, + "value": "E", + "confidence": 1.0, "time": 161.065238 - }, + }, { - "duration": 1.532517, - "confidence": 1.0, - "value": "B", + "duration": 1.5325170000000128, + "value": "B", + "confidence": 1.0, "time": 165.581519 - }, + }, { - "duration": 1.532517, - "confidence": 1.0, - "value": "A", + "duration": 1.5325170000000128, + "value": "A", + "confidence": 1.0, "time": 167.114036 - }, + }, { - "duration": 1.090856, - "confidence": 1.0, - "value": "E", + "duration": 1.0908560000000023, + "value": "E", + "confidence": 1.0, "time": 168.646553 - }, + }, { - "duration": 1.949764, - "confidence": 1.0, - "value": "E:9", + "duration": 1.9497639999999876, + "value": "E:9", + "confidence": 1.0, "time": 169.737409 - }, + }, { - "duration": 4.116909, - "confidence": 1.0, - "value": "N", + "duration": 4.116908999999993, + "value": "N", + "confidence": 1.0, "time": 171.687173 } - ] + ], + "namespace": "chord", + "time": 0, + "annotation_metadata": { + "version": "", + "annotation_tools": "", + "annotator": {}, + "curator": { + "email": "", + "name": "" + }, + "data_source": "", + "corpus": "", + "annotation_rules": "", + "validation": "" + }, + "sandbox": {} } - ], + ], "file_metadata": { - "jams_version": "0.2.0", - "title": "", - "identifiers": {}, - "release": "", - "duration": 175.804082, - "artist": "" - } + "duration": 175.804082, + "jams_version": "0.2.3", + "artist": "", + "identifiers": {}, + "release": "", + "title": "" + }, + "sandbox": {} } \ No newline at end of file diff --git a/docs/examples/example_chord_import.py b/docs/examples/example_chord_import.py index 1687c301..7f290460 100644 --- a/docs/examples/example_chord_import.py +++ b/docs/examples/example_chord_import.py @@ -3,6 +3,7 @@ import jams import sys + def import_chord_jams(infile, outfile): # import_lab returns a new jams object, @@ -10,14 +11,12 @@ def import_chord_jams(infile, outfile): jam, chords = jams.util.import_lab('chord', infile) # Infer the track duration from the end of the last annotation - duration = (chords.data['time'] + chords.data['duration']).max() + duration = max([obs.time + obs.duration for obs in chords]) - # this timing will be in pandas timedelta. - # calling duration.total_seconds() converts to float - jam.file_metadata.duration = duration.total_seconds() + jam.file_metadata.duration = duration chords.time = 0 - chords.duration = duration.total_seconds() + chords.duration = duration # save to disk jam.save(outfile) @@ -27,4 +26,3 @@ def import_chord_jams(infile, outfile): infile, outfile = sys.argv[1:] import_chord_jams(infile, outfile) - diff --git a/jams/schema.py b/jams/schema.py index 4203a438..203be0db 100644 --- a/jams/schema.py +++ b/jams/schema.py @@ -126,7 +126,8 @@ def values(ns_key): def get_dtypes(ns_key): - '''Get the dtypes associated with the value and confidence fields for a given schema. + '''Get the dtypes associated with the value and confidence fields + for a given namespace. Parameters ---------- @@ -136,7 +137,7 @@ def get_dtypes(ns_key): Returns ------- value_dtype, confidence_dtype : numpy.dtype - Type identifiers for dataframe/jamsframe columns. + Type identifiers for value and confidence fields. ''' # First, get the schema From 55c2cd5d0a3ee421b1e21db064735f36151806ce Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:47:47 -0400 Subject: [PATCH 30/31] cleaning up docstring examples --- jams/core.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/jams/core.py b/jams/core.py index 4e3af392..6fa1a851 100644 --- a/jams/core.py +++ b/jams/core.py @@ -766,19 +766,17 @@ def trim(self, start_time, end_time, strict=False): >>> ann_trim = ann.trim(5, 8, strict=False) >>> print(ann_trim.time, ann_trim.duration) (5, 3) - >>> ann_trim.data - time duration value confidence - 0 00:00:05 00:00:01 two None - 1 00:00:06 00:00:02 three None - 2 00:00:07 00:00:01 four None - >>> + >>> ann_trim.to_dataframe() + time duration value confidence + 0 5 1 two None + 1 6 2 three None + 2 7 1 four None >>> ann_trim_strict = ann.trim(5, 8, strict=True) >>> print(ann_trim_strict.time, ann_trim_strict.duration) (5, 3) >>> ann_trim_strict.data - time duration value confidence - 0 00:00:06 00:00:02 three None - + time duration value confidence + 0 6 2 three None ''' # Check for basic start_time and end_time validity if end_time <= start_time: @@ -917,18 +915,16 @@ def slice(self, start_time, end_time, strict=False): >>> print(ann_slice.time, ann_slice.duration) (0, 3) >>> ann_slice.data - time duration value confidence - 0 00:00:00 00:00:01 two None - 1 00:00:01 00:00:02 three None - 2 00:00:02 00:00:01 four None - >>> + time duration value confidence + 0 0 1 two None + 1 1 2 three None + 2 2 1 four None >>> ann_slice_strict = ann.slice(5, 8, strict=True) >>> print(ann_slice_strict.time, ann_slice_strict.duration) (0, 3) >>> ann_slice_strict.data - time duration value confidence - 0 00:00:01 00:00:02 three None - + time duration value confidence + 0 1 2 three None ''' # start by trimming the annotation sliced_ann = self.trim(start_time, end_time, strict=strict) From f41f717e7e6d2306eec7acf6ed7b32970b4b9d04 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 15 May 2017 14:49:12 -0400 Subject: [PATCH 31/31] more cleaning of docs --- jams/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jams/core.py b/jams/core.py index 6fa1a851..92ca269d 100644 --- a/jams/core.py +++ b/jams/core.py @@ -1021,7 +1021,8 @@ def to_dataframe(self): ------- df : pd.DataFrame Columns are `time, duration, value, confidence`. - Each row is an observation. + Each row is an observation, and rows are sorted by + ascending `time`. ''' return pd.DataFrame.from_records(list(self.data), columns=['time', 'duration',