"""mirdata annotation data types
"""
import logging
import re
from typing import List, Optional, Tuple
from deprecated.sphinx import deprecated
from jams.schema import namespace
import librosa
import numpy as np
import scipy
#: Beat position units
BEAT_POSITION_UNITS = {
"bar_index": "beat index within a bar, 1-indexed",
"global_index": "beat index within full track, 1-indexed",
"bar_fraction": "beat position as fractions of bars, e.g. 0.25",
"global_fraction": "bar_frac, but where the integer part indicates the bar. e.g. 4.25",
}
#: Chord units
CHORD_UNITS = {
"harte": "chords in harte format, e.g. Ab:maj7",
"jams": "chords in jams 'chord' format",
"open": "no strict schema or units",
}
#: Amplitude/voicing units
AMPLITUDE_UNITS = {
"likelihood": "score between 0 and 1",
"velocity": "MIDI velocity between 0 and 127",
"binary": "0 or 1",
"energy": "energy value, measured as the sum of a squared signal",
}
#: Event units
EVENT_UNITS = {"open": "no scrict schema or units"}
#: Key units
KEY_UNITS = {"key_mode": "key labels in key-mode format, e.g. G#:minor"}
#: Lyric units
LYRIC_UNITS = {
"words": "lyrics as words or phrases",
"syllable_open": "lyrics segmented by syllable, no strict schema",
"pronunciations_open": "lyric pronunciations, no strict schema",
}
#: Pitch units
PITCH_UNITS = {
"hz": "hertz",
"midi": "MIDI note number",
"pc": "pitch class, e.g. G#",
"note_name": "pc with octave, e.g. Ab4",
}
#: Section units
SECTION_UNITS = {"open": "no scrict schema or units"}
#: Tempo units
TEMPO_UNITS = {"bpm": "beats per minute"}
#: Time units
TIME_UNITS = {"s": "seconds", "ms": "miliseconds", "ticks": "MIDI ticks"}
#: Voicing units
VOICING_UNITS = {k: AMPLITUDE_UNITS[k] for k in ["binary", "likelihood"]}
[docs]
class Annotation(object):
"""Annotation base class"""
def __repr__(self):
attributes = [v for v in dir(self) if not v.startswith("_")]
repr_str = f"{self.__class__.__name__}({', '.join(attributes)})"
return repr_str
[docs]
class BeatData(Annotation):
"""BeatData class
Attributes:
times (np.ndarray): array of time stamps with positive,
strictly increasing values
time_unit (str): time unit, one of TIME_UNITS
positions (np.ndarray): array of beat positions in the format
of position_unit. For all units, values of 0 indicate beats which
fall outside of a measure.
position_unit (str): beat position unit, one of BEAT_POSITION_UNITS
confidence (np.ndarray): array of confidence values
confidence_unit (str): confidence unit, one of AMPLITUDE_UNITS
"""
def __init__(
self,
times,
time_unit,
positions,
position_unit,
confidence=None,
confidence_unit=None,
):
validate_array_like(times, np.ndarray, float)
validate_lengths_equal([times, positions])
validate_times(times, time_unit)
validate_beat_positions(positions, position_unit)
validate_confidence(confidence, confidence_unit)
self.times = times
self.time_unit = time_unit
self.positions = positions
self.position_unit = position_unit
self.confidence = confidence
self.confidence_unit = confidence_unit
[docs]
class SectionData(Annotation):
"""SectionData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
labels (list or None): list of section labels
label_unit (str or None): label unit, one of SECTION_UNITS
"""
def __init__(self, intervals, interval_unit, labels=None, label_unit=None):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(labels, list, str, none_allowed=True)
validate_lengths_equal([intervals, labels])
validate_intervals(intervals, interval_unit)
validate_unit(label_unit, SECTION_UNITS, allow_none=True)
self.intervals = intervals
self.interval_unit = interval_unit
self.labels = labels
self.label_unit = label_unit
[docs]
class ChordData(Annotation):
"""ChordData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
labels (list): list chord labels (as strings)
label_unit (str): chord label schema
confidence (np.ndarray or None): array of confidence values
confidence_unit (str or None): confidence unit, one of AMPLITUDE_UNITS
"""
def __init__(
self,
intervals,
interval_unit,
labels,
label_unit,
confidence=None,
confidence_unit=None,
):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(labels, list, str)
validate_array_like(confidence, np.ndarray, float, none_allowed=True)
validate_lengths_equal([intervals, labels, confidence])
validate_intervals(intervals, interval_unit)
validate_unit(label_unit, CHORD_UNITS)
validate_chord_labels(labels, label_unit)
validate_confidence(confidence, confidence_unit)
self.intervals = intervals
self.labels = labels
self.confidence = confidence
[docs]
class F0Data(Annotation):
"""F0Data class
Attributes:
times (np.ndarray): array of time stamps (as floats)
with positive, strictly increasing values
time_unit (str): time unit, one of TIME_UNITS
frequencies (np.ndarray): array of frequency values (as floats)
frequency_unit (str): frequency unit, one of PITCH_UNITS
voicing (np.ndarray): array of voicing values, indicating whether or
not a time frame has an active pitch
voicing_unit (str): voicing unit, one of VOICING_UNITS
confidence (np.ndarray or None): array of confidence values
confidence_unit (str or None): confidence unit, one of AMPLITUDE_UNITS
"""
def __init__(
self,
times,
time_unit,
frequencies,
frequency_unit,
voicing,
voicing_unit,
confidence=None,
confidence_unit=None,
):
validate_array_like(times, np.ndarray, float)
if frequency_unit in ["note_name", "pc"]:
validate_array_like(frequencies, np.ndarray, None)
else:
validate_array_like(frequencies, np.ndarray, float)
validate_array_like(voicing, np.ndarray, float)
validate_array_like(confidence, np.ndarray, float, none_allowed=True)
validate_lengths_equal([times, frequencies, voicing, confidence])
validate_times(times, time_unit)
validate_uniform_times(times)
validate_pitches(frequencies, frequency_unit)
validate_voicing(voicing, voicing_unit)
validate_confidence(confidence, confidence_unit)
if any(voicing[frequencies == 0] != 0):
raise ValueError("Found frequencies with value 0, but a nonzero voicing.")
self.times = times
self.time_unit = time_unit
self.frequencies = frequencies
self.frequency_unit = frequency_unit
self.voicing = voicing
self.voicing_unit = voicing_unit
self._confidence = confidence
self.confidence_unit = confidence_unit
@property
def confidence(self):
logging.warning(
"Warning: the API for annotations.F0Data.confidence has changed. "
+ "For most datasets, confidence will now be None, and "
+ "F0Data.voicing should be used instead."
)
return self._confidence
[docs]
def resample(self, times_new, times_new_unit):
"""Resample the annotation to a new time scale. This function is adapted from:
https://github.com/craffel/mir_eval/blob/master/mir_eval/melody.py#L212
Args:
times_new (np.ndarray): new time base, in units of times_new_unit
times_new_unit (str): time unit, one of TIME_UNITS
Returns:
F0Data: F0 data sampled at new time scale
"""
times = convert_time_units(self.times, self.time_unit, times_new_unit)
if self.frequency_unit not in ["hz", "midi"]:
raise NotImplementedError(
"resampling is not supported for {}".format(self.frequency_unit)
)
frequencies = self.frequencies
voicing = self.voicing
confidence = self._confidence
# We need to fix zero transitions
# Fill in zero values with the last reported frequency
# to avoid erroneous values when resampling
frequencies_held = np.array(frequencies)
for n, frequency in enumerate(frequencies[1:]):
if frequency == 0:
frequencies_held[n + 1] = frequencies_held[n]
# Linearly interpolate frequencies
frequencies_resampled = scipy.interpolate.interp1d(
times, frequencies_held, "linear", bounds_error=False, fill_value=0.0
)(times_new)
# Retain zeros
frequency_mask = scipy.interpolate.interp1d(
times, frequencies, "zero", bounds_error=False, fill_value=0
)(times_new)
frequencies_resampled *= frequency_mask != 0
# Use nearest-neighbor for voicing if it was used for frequencies
# if voicing is not binary, use linear interpolation
if self.voicing_unit != "binary":
voicing_resampled = scipy.interpolate.interp1d(
times, voicing, "linear", bounds_error=False, fill_value=0
)(times_new)
else:
voicing_resampled = scipy.interpolate.interp1d(
times, voicing, "nearest", bounds_error=False, fill_value=0
)(times_new)
voicing_resampled[frequencies_resampled == 0] = 0
if confidence is None:
confidence_resampled = None
# binary confidence
elif self.confidence_unit == "binary":
confidence_resampled = scipy.interpolate.interp1d(
times, confidence, "nearest", bounds_error=False, fill_value=0
)(times_new)
# nonbinary confidence
else:
confidence_resampled = scipy.interpolate.interp1d(
times, confidence, "linear", bounds_error=False, fill_value=0
)(times_new)
return F0Data(
times_new,
times_new_unit,
frequencies_resampled,
self.frequency_unit,
voicing_resampled,
self.voicing_unit,
confidence_resampled,
self.confidence_unit,
)
[docs]
def to_sparse_index(
self,
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit="binary",
):
"""
Convert F0 annotation to sparse matrix indices for a time-frequency matrix.
Args:
time_scale (np.array): times in units time_unit
time_scale_unit (str): time scale units, one of TIME_UNITS
frequency_scale (np.array): frequencies in frequency_unit
frequency_scale_unit (str): frequency scale units, one of PITCH_UNITS
amplitude_unit (str): amplitude units, one of AMPLITUDE_UNITS
Defaults to "binary".
Returns:
* sparse_index (np.ndarray): Array of sparce indices [(time_index, frequency_index)]
* amplitude (np.ndarray): Array of amplitude values for each index
"""
f0dat = self.resample(time_scale, time_scale_unit)
frequencies = convert_pitch_units(
f0dat.frequencies, self.frequency_unit, frequency_scale_unit
)
# get indexes in matrix
nonzero_freqs = frequencies > 0 # find indexes for frequencies not equal to 0
frequencies[frequencies == 0] = 1 # change zero frequency value to avoid NaN
time_indexes = np.arange(len(time_scale))
freq_indexes = closest_index(
np.log(frequencies)[:, np.newaxis], np.log(frequency_scale)[:, np.newaxis]
)
# create sparse index
index = [
(t, f)
for t, f in zip(time_indexes[nonzero_freqs], freq_indexes[nonzero_freqs])
if t != -1 and f != -1
]
voicing = np.array(
[
v
for (v, t, f) in zip(
f0dat.voicing[nonzero_freqs],
time_indexes[nonzero_freqs],
freq_indexes[nonzero_freqs],
)
if t != -1 and f != -1
]
)
return (
np.array(index),
convert_amplitude_units(voicing, self.voicing_unit, amplitude_unit),
)
[docs]
def to_matrix(
self,
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit="binary",
):
"""Convert f0 data to a matrix (piano roll) defined by a time and frequency scale
Args:
time_scale (np.array): times in units time_unit
time_scale_unit (str): time scale units, one of TIME_UNITS
frequency_scale (np.array): frequencies in frequency_unit
frequency_scale_unit (str): frequency scale units, one of PITCH_UNITS
amplitude_unit (str): amplitude units, one of AMPLITUDE_UNITS
Defaults to "binary".
Returns:
np.ndarray: 2D matrix of shape len(time_scale) x len(frequency_scale)
"""
index, voicing = self.to_sparse_index(
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit,
)
matrix = np.zeros((len(time_scale), len(frequency_scale)))
matrix[index[:, 0], index[:, 1]] = voicing
return matrix
[docs]
def to_multif0(self):
"""Convert annotation to multif0 format
Returns:
MultiF0Data: data in multif0 format
"""
frequency_list = [[f] if f > 0 else [] for f in self.frequencies]
confidence_list = (
None
if self._confidence is None
else [
[c] if f > 0 else [] for c, f in zip(self._confidence, self.frequencies)
]
)
return MultiF0Data(
self.times,
self.time_unit,
frequency_list,
self.frequency_unit,
confidence_list,
self.confidence_unit,
)
[docs]
def to_mir_eval(self):
"""Convert units and format to what is expected by mir_eval.melody.evaluate
Returns:
* times (np.ndarray) - uniformly spaced times in seconds
* frequencies (np.ndarray) - frequency values in hz
* voicing (np.ndarray) - voicings, as likelihood values
"""
times = convert_time_units(self.times, self.time_unit, "s")
frequencies = convert_pitch_units(self.frequencies, self.frequency_unit, "hz")
voicing = convert_amplitude_units(self.voicing, self.voicing_unit, "likelihood")
return times, frequencies, voicing
[docs]
class MultiF0Data(Annotation):
"""MultiF0Data class
Attributes:
times (np.ndarray): array of time stamps (as floats)
with positive, strictly increasing values
time_unit (str): time unit, one of TIME_UNITS
frequency_list (list): list of lists of frequency values (as floats)
frequency_unit (str): frequency unit, one of PITCH_UNITS
confidence_list (np.ndarray or None): list of lists of confidence values
confidence_unit (str or None): confidence unit, one of AMPLITUDE_UNITS
"""
def __init__(
self,
times,
time_unit,
frequency_list,
frequency_unit,
confidence_list=None,
confidence_unit=None,
):
validate_array_like(times, np.ndarray, float)
validate_array_like(frequency_list, list, list)
validate_array_like(confidence_list, list, list, none_allowed=True)
validate_lengths_equal([times, frequency_list, confidence_list])
validate_times(times, time_unit)
validate_uniform_times(times)
validate_pitches(frequency_list, frequency_unit)
validate_confidence(confidence_list, confidence_unit)
self.times = times
self.time_unit = time_unit
self.frequency_list = frequency_list
self.frequency_unit = frequency_unit
self.confidence_list = confidence_list
self.confidence_unit = confidence_unit
self._remove_duplicates()
def _remove_duplicates(self):
new_frequency_list = []
new_confidence_list = []
confidence_list = (
[[0 for _ in flist] for flist in self.frequency_list]
if self.confidence_list is None
else self.confidence_list
)
for flist, clist in zip(self.frequency_list, confidence_list):
tmp_flist = []
tmp_clist = []
for f, c in zip(flist, clist):
if f in tmp_flist:
continue
tmp_flist.append(f)
tmp_clist.append(c)
new_frequency_list.append(tmp_flist)
new_confidence_list.append(tmp_clist)
self.frequency_list = new_frequency_list
self.confidence_list = (
None if self.confidence_list is None else new_confidence_list
)
def __add__(self, other):
if other is None:
return self
if isinstance(other, F0Data):
other = other.to_multif0()
if not isinstance(other, MultiF0Data):
raise TypeError("Unable to add type {} to MultiF0 data".format(type(other)))
other_times = convert_time_units(other.times, other.time_unit, self.time_unit)
if np.max(other_times) > np.max(self.times):
data_resamp = self.resample(other_times, self.time_unit)
times = other_times
this_data = data_resamp
other_data = other
else:
other_resamp = other.resample(self.times, self.time_unit)
times = self.times
this_data = self
other_data = other_resamp
this_frequency_list = [[f for f in flist] for flist in this_data.frequency_list]
other_frequency_list = convert_pitch_units(
other_data.frequency_list, other.frequency_unit, self.frequency_unit
)
for i, flist in enumerate(other_frequency_list):
this_frequency_list[i].extend(flist)
this_has_confidence = this_data.confidence_list is not None
other_has_confidence = other_data.confidence_unit is not None
this_confidence_unit = this_data.confidence_unit
if this_has_confidence and other_has_confidence:
this_confidence_list = [
[c for c in clist] for clist in this_data.confidence_list
]
other_confidence_list = convert_amplitude_units(
other_data.confidence_list, other.confidence_unit, self.confidence_unit
)
for i, clist in enumerate(other_confidence_list):
this_confidence_list[i].extend(clist)
elif not this_has_confidence and not other_has_confidence:
this_confidence_list = None
else:
logging.warning(
"Adding two MultiF0Data where one has confidence=None "
+ "and the other does not. The sum will have confidence=None."
)
this_confidence_list = None
this_confidence_unit = None
return MultiF0Data(
times,
self.time_unit,
this_frequency_list,
self.frequency_unit,
this_confidence_list,
this_confidence_unit,
)
[docs]
def resample(self, times_new, times_new_unit):
"""Resample annotation to a new time scale. This function is adapted from:
https://github.com/craffel/mir_eval/blob/master/mir_eval/multipitch.py#L104
Args:
times_new (np.array): array of new time scale values
times_new_unit (str): units for new time scale, one of TIME_UNITS
Returns:
MultiF0Data: the resampled annotation
"""
times = convert_time_units(self.times, self.time_unit, times_new_unit)
n_times = len(self.times)
# scipy's interpolate doesn't handle ragged arrays. Instead, we interpolate
# the frequency index and then map back to the frequency values.
# This only works because we're using a nearest neighbor interpolator!
frequency_index = np.arange(0, n_times)
# times are already ordered so assume_sorted=True for efficiency
# since we're interpolating the index, fill_value is set to the first index
# that is out of range. We handle this in the next line.
new_frequency_index = scipy.interpolate.interp1d(
times,
frequency_index,
kind="nearest",
bounds_error=False,
assume_sorted=True,
fill_value=n_times,
)(times_new)
# create array of frequencies plus additional empty element at the end for
# target time stamps that are out of the interpolation range
freq_vals = self.frequency_list + [[]]
# map interpolated indices back to frequency values
frequencies_resampled = [freq_vals[i] for i in new_frequency_index.astype(int)]
if self.confidence_list is not None:
confidence_vals = self.confidence_list + [[]]
confidence_resampled = [
confidence_vals[i] for i in new_frequency_index.astype(int)
]
else:
confidence_resampled = None
return MultiF0Data(
times_new,
times_new_unit,
frequencies_resampled,
self.frequency_unit,
confidence_resampled,
self.confidence_unit,
)
[docs]
def to_sparse_index(
self,
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit="binary",
):
"""
Convert MultiF0 annotation to sparse matrix indices for a time-frequency matrix.
Args:
time_scale (np.array): times in units time_unit
time_scale_unit (str): time scale units, one of TIME_UNITS
frequency_scale (np.array): frequencies in frequency_unit
frequency_scale_unit (str): frequency scale units, one of PITCH_UNITS
amplitude_unit (str): amplitude units, one of AMPLITUDE_UNITS
Defaults to "binary".
Returns:
* sparse_index (np.ndarray): Array of sparce indices [(time_index, frequency_index)]
* amplitude (np.ndarray): Array of amplitude values for each index
"""
multif0dat = self.resample(time_scale, time_scale_unit)
time_indexes = np.arange(len(time_scale))
frequencies_flattened = convert_pitch_units(
np.array([f for f_list in multif0dat.frequency_list for f in f_list]),
self.frequency_unit,
frequency_scale_unit,
)
time_indexes_flattened = np.array(
[
t
for (t, f_list) in zip(time_indexes, multif0dat.frequency_list)
for f in f_list
]
)
if multif0dat.confidence_list is None:
confidence_flattened = np.ones((len(time_indexes_flattened),))
conf_unit = "binary"
else:
confidence_flattened = np.array(
[c for c_list in multif0dat.confidence_list for c in c_list]
)
conf_unit = self.confidence_unit
# get frequency indexes in matrix
nonzero_freqs = (
frequencies_flattened > 0
) # find indexes for frequencies not equal to 0
frequencies_flattened[frequencies_flattened == 0] = (
1 # change zero frequency value to avoid NaN
)
freq_indexes = closest_index(
np.log(frequencies_flattened)[:, np.newaxis],
np.log(frequency_scale)[:, np.newaxis],
)
# create sparse index
index = [
(t, f)
for t, f in zip(
time_indexes_flattened[nonzero_freqs], freq_indexes[nonzero_freqs]
)
if t != -1 and f != -1
]
confidence_out = np.array(
[
c
for c, t, f in zip(
confidence_flattened[nonzero_freqs],
time_indexes_flattened[nonzero_freqs],
freq_indexes[nonzero_freqs],
)
if t != -1 and f != -1
]
)
return (
np.array(index),
convert_amplitude_units(confidence_out, conf_unit, amplitude_unit),
)
[docs]
def to_matrix(
self,
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit="binary",
):
"""Convert f0 data to a matrix (piano roll) defined by a time and frequency scale
Args:
time_scale (np.array): times in units time_unit
time_scale_unit (str): time scale units, one of TIME_UNITS
frequency_scale (np.array): frequencies in frequency_unit
frequency_scale_unit (str): frequency scale units, one of PITCH_UNITS
amplitude_unit (str): amplitude units, one of AMPLITUDE_UNITS
Defaults to "binary".
Returns:
np.ndarray: 2D matrix of shape len(time_scale) x len(frequency_scale)
"""
index, voicing = self.to_sparse_index(
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit,
)
matrix = np.zeros((len(time_scale), len(frequency_scale)))
matrix[index[:, 0], index[:, 1]] = voicing
return matrix
[docs]
def to_mir_eval(self):
"""Convert annotation into the format expected by mir_eval.multipitch.evaluate
Returns:
* times (np.ndarray): array of uniformly spaced time stamps in seconds
* frequency_list (list): list of np.array of frequency values in Hz
"""
times = convert_time_units(self.times, self.time_unit, "s")
frequency_list = [
convert_pitch_units(np.array(flist), self.frequency_unit, "hz")
for flist in self.frequency_list
]
return times, frequency_list
[docs]
class NoteData(Annotation):
"""NoteData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
pitches (np.ndarray): array of pitches
pitch_unit (str): note unit, one of PITCH_UNITS
confidence (np.ndarray or None): array of confidence values
confidence_unit (str or None): confidence unit, one of AMPLITUDE_UNITS
"""
def __init__(
self,
intervals: np.ndarray,
interval_unit: str,
pitches: np.ndarray,
pitch_unit: str,
confidence: Optional[np.ndarray] = None,
confidence_unit: Optional[str] = None,
):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(pitches, np.ndarray, float)
validate_array_like(confidence, np.ndarray, float, none_allowed=True)
validate_lengths_equal([intervals, pitches, confidence])
validate_intervals(intervals, interval_unit)
validate_pitches(pitches, pitch_unit)
validate_confidence(confidence, confidence_unit)
self.intervals = intervals
self.interval_unit = interval_unit
self.pitches = pitches
self.pitch_unit = pitch_unit
self.confidence = confidence
self.confidence_unit = confidence_unit
self._remove_duplicates()
@property
def notes(self) -> np.ndarray:
logging.warning(
"NoteData.notes is deprecated as of 0.3.4 and will be removed in a future version. Use"
" NoteData.pitches."
)
return self.pitches
def _remove_duplicates(self):
# deduplicate if matching interval and pitch
unq, unq_idx = np.unique(
np.hstack([self.intervals, self.pitches[:, np.newaxis]]),
axis=0,
return_index=True,
)
self.intervals = unq[:, :2]
self.pitches = unq[:, 2]
if self.confidence is not None:
self.confidence = self.confidence[unq_idx]
def __add__(self, other):
if other is None:
return self
if not isinstance(other, NoteData):
raise TypeError("Unable to add type {} to NoteData".format(type(other)))
# convert to the current units
intervals = convert_time_units(
other.intervals, other.interval_unit, self.interval_unit
)
pitches = convert_pitch_units(other.pitches, other.pitch_unit, self.pitch_unit)
if other.confidence is None and self.confidence is None:
new_confidence = None
new_confidence_unit = None
elif other.confidence is not None and self.confidence is not None:
new_confidence = np.concatenate(
[
self.confidence,
convert_amplitude_units(
other.confidence, other.confidence_unit, self.confidence_unit
),
]
)
new_confidence_unit = self.confidence_unit
else:
logging.warning(
"Adding two NoteData objects but one has confidence=None and "
+ "the other does not. The resulting confidence will be None"
)
new_confidence = None
new_confidence_unit = None
return NoteData(
np.vstack([self.intervals, intervals]),
self.interval_unit,
np.concatenate([self.pitches, pitches]),
self.pitch_unit,
new_confidence,
new_confidence_unit,
)
[docs]
def to_sparse_index(
self,
time_scale: np.ndarray,
time_scale_unit: str,
frequency_scale: np.ndarray,
frequency_scale_unit: str,
amplitude_unit: str = "binary",
onsets_only: bool = False,
) -> Tuple[np.ndarray, np.ndarray]:
"""Convert note annotations to indexes of a sparse matrix (piano roll)
Args:
time_scale (np.array): array of matrix time stamps in seconds
time_scale_unit (str): units for time scale values, one of TIME_UNITS
frequency_scale (np.array): array of matrix frequency values in seconds
frequency_scale_unit (str): units for frequency scale values, one of PITCH_UNITS
amplitude_unit (str): units for amplitude values, one of AMPLITUDE_UNITS.
Defaults to "binary".
onsets_only (bool, optional): If True, returns an onset piano roll.
Defaults to False.
Returns:
* sparse_index (np.ndarray): Array of sparce indices [(time_index, frequency_index)]
* amplitude (np.ndarray): Array of amplitude values for each index
"""
intervals = convert_time_units(
self.intervals, self.interval_unit, time_scale_unit
)
freqs_hz = convert_pitch_units(
self.pitches, self.pitch_unit, frequency_scale_unit
)
if self.confidence is not None:
confidence = convert_amplitude_units(
self.confidence, self.confidence_unit, amplitude_unit
)
else:
confidence = convert_amplitude_units(
np.ones((freqs_hz.shape)), "binary", amplitude_unit
)
time_index_0 = closest_index(
intervals[:, 0, np.newaxis], time_scale[:, np.newaxis]
)
freq_indexes = closest_index(
np.log(freqs_hz)[:, np.newaxis], np.log(frequency_scale)[:, np.newaxis]
)
if onsets_only:
onset_index = []
confidences = []
for t0, f, c in zip(time_index_0, freq_indexes, confidence):
if t0 == -1 or f == -1:
continue
onset_index.append([t0, f])
confidences.append(c)
return np.array(onset_index), np.array(confidences)
time_index_1 = closest_index(
intervals[:, 1, np.newaxis], time_scale[:, np.newaxis]
)
max_idx = len(time_scale) - 1
sparse_index = []
confidences = []
for t0, t1, f, c in zip(time_index_0, time_index_1, freq_indexes, confidence):
if f == -1 or (t0 == -1 and t1 == -1):
continue
t_start = max([t0, 0])
t_end = (t1 if t1 != -1 else max_idx) + 1
sparse_index.extend([[t, f] for t in range(t_start, t_end)])
confidences.extend([c for _ in range(t_start, t_end)])
return np.array(sparse_index), np.array(confidences)
[docs]
def to_matrix(
self,
time_scale: np.ndarray,
time_scale_unit: str,
frequency_scale: np.ndarray,
frequency_scale_unit: str,
amplitude_unit: str = "binary",
onsets_only: bool = False,
) -> np.ndarray:
"""Convert f0 data to a matrix (piano roll) defined by a time and frequency scale
Args:
time_scale (np.ndarray): array of matrix time stamps in seconds
time_scale_unit (str): units for time scale values, one of TIME_UNITS
frequency_scale (np.ndarray): array of matrix frequency values in seconds
frequency_scale_unit (str): units for frequency scale values, one of PITCH_UNITS
onsets_only (bool, optional): If True, returns an onset piano roll.
Defaults to False.
Returns:
np.ndarray: 2D matrix of shape len(time_scale) x len(frequency_scale)
"""
index, voicing = self.to_sparse_index(
time_scale,
time_scale_unit,
frequency_scale,
frequency_scale_unit,
amplitude_unit,
onsets_only,
)
matrix = np.zeros((len(time_scale), len(frequency_scale)))
matrix[index[:, 0], index[:, 1]] = voicing
return matrix
[docs]
def to_multif0(
self, time_hop: float, time_hop_unit: str, max_time: Optional[float] = None
) -> MultiF0Data:
"""Convert note annotation to multiple f0 format.
Args:
time_hop (float): time between time stamps in multif0 annotation
time_hop_unit (str): unit for time_hop, and resulting multif0 data.
One of TIME_UNITS
max_time (float, optional): Maximum time stamp in time_hop units.
Defaults to None, in which case the maximum note interval
time is used.
Returns:
MultiF0Data: multif0 annotation
"""
intervals = convert_time_units(
self.intervals, self.interval_unit, time_hop_unit
)
note_time_max = np.max(intervals[:, 1])
max_time = note_time_max if not max_time else max_time
if max_time < note_time_max:
raise ValueError(
"max_time = {} cannot be smaller than the last note interval = {}".format(
max_time, note_time_max
)
)
times = np.arange(0, max_time + time_hop, time_hop)
frequency_list: List[List[float]] = [[] for _ in times]
confidence_list: List[List[float]] = [[] for _ in times]
if self.confidence is not None:
for t0, t1, pch, conf in zip(
intervals[:, 0], intervals[:, 1], self.pitches, self.confidence
):
for i in range(
int(np.round(t0 / time_hop)), int(np.round(t1 / time_hop)) + 1
):
frequency_list[i].append(pch)
confidence_list[i].append(conf)
else:
for t0, t1, pch in zip(intervals[:, 0], intervals[:, 1], self.pitches):
for i in range(
int(np.round(t0 / time_hop)), int(np.round(t1 / time_hop)) + 1
):
frequency_list[i].append(pch)
return MultiF0Data(
times,
time_hop_unit,
frequency_list,
self.pitch_unit,
None if self.confidence is None else confidence_list,
self.confidence_unit,
)
[docs]
def to_mir_eval(self):
"""Convert data to the format expected by mir_eval.transcription.evaluate and
mir_eval.transcription_velocity.evaluate
Returns:
* intervals (np.ndarray) - (n x 2) array of intervals of start time, end time in seconds
* pitches (np.ndarray) - array of pitch values in hz
* velocity (optional, np.ndarray) - array of velocity values between 0 and 127
"""
intervals = convert_time_units(self.intervals, self.interval_unit, "s")
pitches = convert_pitch_units(self.pitches, self.pitch_unit, "hz")
velocity = (
None
if self.confidence is None
else convert_amplitude_units(
self.confidence, self.confidence_unit, "velocity"
)
)
return intervals, pitches, velocity
[docs]
class KeyData(Annotation):
"""KeyData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
keys (list): list key labels (as strings)
key_unit (str): key unit, one of KEY_UNITS
"""
def __init__(self, intervals, interval_unit, keys, key_unit):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(keys, list, str)
validate_lengths_equal([intervals, keys])
validate_intervals(intervals, interval_unit)
validate_key_labels(keys, key_unit)
self.intervals = intervals
self.interval_unit = interval_unit
self.keys = keys
self.key_unit = key_unit
[docs]
class LyricData(Annotation):
"""LyricData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
lyrics (list): list of lyrics (as strings)
lyric_unit (str): lyric unit, one of LYRIC_UNITS
"""
def __init__(self, intervals, interval_unit, lyrics, lyric_unit):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(lyrics, list, str)
validate_lengths_equal([intervals, lyrics])
validate_intervals(intervals, interval_unit)
validate_unit(lyric_unit, LYRIC_UNITS)
self.intervals = intervals
self.interval_unit = interval_unit
self.lyrics = lyrics
self.lyric_unit = lyric_unit
@property
def pronunciations(self):
logging.warning(
"LyricData.pronunciations is deprecated as of 0.3.4 and will be removed in a future"
" version. Use LyricData.lyrics."
)
return self.lyrics
[docs]
class TempoData(Annotation):
"""TempoData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
tempos (list): array of tempo values (as floats)
tempo_unit (str): tempo unit, one of TEMPO_UNITS
confidence (np.ndarray or None): array of confidence values
confidence_unit (str or None): confidence unit, one of AMPLITUDE_UNITS
"""
def __init__(
self,
intervals,
interval_unit,
tempos,
tempo_unit,
confidence=None,
confidence_unit=None,
):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(tempos, np.ndarray, float)
validate_array_like(confidence, np.ndarray, float, none_allowed=True)
validate_lengths_equal([intervals, tempos, confidence])
validate_intervals(intervals, interval_unit)
validate_tempos(tempos, tempo_unit)
validate_confidence(confidence, confidence_unit)
self.intervals = intervals
self.interval_unit = interval_unit
self.tempos = tempos
self.tempo_unit = tempo_unit
self.confidence = confidence
self.confidence_unit = confidence_unit
@property
def value(self):
logging.warning(
"TempoData.value is deprecated as of 0.3.4 and will be removed in a future version. Use"
" TempoData.tempos."
)
return self.tempos
[docs]
class EventData(Annotation):
"""EventData class
Attributes:
intervals (np.ndarray): (n x 2) array of intervals
in the form [start_time, end_time]. Times should be positive
and intervals should have non-negative duration
interval_unit (str): unit of the time values in intervals. One
of TIME_UNITS.
interval_unit (str): interval units, one of TIME_UNITS
events (list): list of event labels (as strings)
event_unit (str): event units, one of EVENT_UNITS
"""
def __init__(self, intervals, interval_unit, events, event_unit):
validate_array_like(intervals, np.ndarray, float)
validate_array_like(events, list, str)
validate_lengths_equal([intervals, events])
validate_intervals(intervals, interval_unit)
validate_unit(event_unit, EVENT_UNITS)
self.intervals = intervals
self.interval_unit = interval_unit
self.events = events
self.event_unit = event_unit
[docs]
def convert_time_units(times, time_unit, target_time_unit):
"""Convert a time array from time_unit to target_time_unit
Args:
times (np.ndarray): array of time values in units time_unit
time_unit (str): time unit, one of TIME_UNITS
target_time_unit (str): new time unit, one of TIME_UNITS
Raises:
ValueError: If time units are not convertable
Returns:
np.ndarray: times in units target_time_unit
"""
if time_unit == "ticks" and target_time_unit == "ticks":
return times
def _to_seconds(times, time_unit):
"""Convert times in time_unit to seconds"""
if time_unit == "s":
return times
if time_unit == "ms":
return times / 1000.0
raise NotImplementedError
def _from_seconds(times_sec, target_time_unit):
"""Convert times in seconds to target_time_unit"""
if target_time_unit == "s":
return times_sec
if target_time_unit == "ms":
return times_sec * 1000.0
raise NotImplementedError
try:
return _from_seconds(_to_seconds(times, time_unit), target_time_unit)
except NotImplementedError:
raise NotImplementedError(
"Conversion of time in units {} to {} is not supported".format(
time_unit, target_time_unit
)
)
[docs]
def convert_pitch_units(pitches, pitch_unit, target_pitch_unit):
"""Convert pitch values from pitch_unit to target_pitch_unit
Args:
pitches (np.array): array of pitch values
pitch_unit (str): unit of pitch, one of PITCH_UNITS
target_pitch_unit (str): target unit of pitch, one of PITCH_UNITS
Raises:
NotImplementedError: If conversion between given units is not supported
Returns:
np.array: array of pitch values in target_pitch_unit
"""
# if input is a nested list, call this function recursively
if isinstance(pitches, list) and isinstance(pitches[0], list):
return [
(
[]
if len(plist) == 0
else list(convert_pitch_units(plist, pitch_unit, target_pitch_unit))
)
for plist in pitches
]
if pitch_unit == "pc" and target_pitch_unit == "pc":
return pitches
def _to_hz(pitches, pitch_unit):
"""Convert pitches in pitch_unit to Hz"""
if pitch_unit == "hz":
return pitches
if pitch_unit == "midi":
zero_idx = pitches == 0
pitches_hz = librosa.midi_to_hz(pitches)
pitches_hz[zero_idx] = 0
return pitches_hz
if pitch_unit == "note_name":
return librosa.note_to_hz(pitches)
raise NotImplementedError
def _from_hz(pitches_hz, target_pitch_unit):
"""Convert pitches int Hz to target_pitch_unit"""
if target_pitch_unit == "hz":
return pitches_hz
if target_pitch_unit == "midi":
zero_idx = pitches_hz == 0
pitches_midi = librosa.hz_to_midi(pitches_hz)
pitches_midi[zero_idx] = 0
return pitches_midi
if target_pitch_unit == "note_name":
# cast to np.array for compatibility with legacy python3.6 and
# librosa 0.9.2. It is redundant for librosa 0.10
return np.array(librosa.hz_to_note(pitches_hz))
raise NotImplementedError
try:
return _from_hz(_to_hz(pitches, pitch_unit), target_pitch_unit)
except NotImplementedError:
raise NotImplementedError(
"Conversion of pitch in units {} to {} is not supported".format(
pitch_unit, target_pitch_unit
)
)
[docs]
def convert_amplitude_units(amplitude, amplitude_unit, target_amplitude_unit):
"""Convert amplitude values to likelihoods
Args:
amplitude (np.array): array of amplitude values
amplitude_unit (str): unit of amplitude, one of AMPLITUDE_UNITS
target_amplitude_unit (str): target unit of amplitude, one of AMPLITUDE_UNITS
Raises:
NotImplementedError: If conversion is not supported
Returns:
np.array: array of amplitude values as in target amplitude unit
"""
# if input is a nested list, call this function recursively
if isinstance(amplitude, list) and isinstance(amplitude[0], list):
return [
(
[]
if len(alist) == 0
else list(
convert_amplitude_units(
np.array(alist), amplitude_unit, target_amplitude_unit
)
)
)
for alist in amplitude
]
def _to_likelihood(amplitude, amplitude_unit):
if amplitude_unit in ["likelihood", "binary"]:
return amplitude
if amplitude_unit == "velocity":
return amplitude / 127.0
raise NotImplementedError
def _from_likelihood(amplitude, target_amplitude_unit):
if target_amplitude_unit == "likelihood":
return amplitude
if target_amplitude_unit == "binary":
return np.ceil(amplitude)
if target_amplitude_unit == "velocity":
return amplitude * 127.0
raise NotImplementedError
try:
return _from_likelihood(
_to_likelihood(amplitude, amplitude_unit), target_amplitude_unit
)
except NotImplementedError:
raise NotImplementedError(
"Conversion of amplitude in units {} to {} is not supported".format(
amplitude_unit, target_amplitude_unit
)
)
[docs]
def closest_index(input_array, target_array):
"""Get array of indices of target_array that are closest to the input_array
Args:
input_array (np.ndarray): (n x 2) array of input values
target_array (np.ndarray): (m x 2) array of target values)
Returns:
np.ndarray: array of shape (n x 1) of indexes into target_array
"""
indexes = np.argmin(scipy.spatial.distance.cdist(input_array, target_array), axis=1)
indexes[input_array[:, 0] > np.max(target_array[:, 0])] = -1
indexes[input_array[:, 0] < np.min(target_array[:, 0])] = -1
return indexes
[docs]
def validate_array_like(array_like, expected_type, expected_dtype, none_allowed=False):
"""Validate that array-like object is well formed
If array_like is None, validation passes automatically.
Args:
array_like (array-like): object to validate
expected_type (type): expected type, either list or np.ndarray
expected_dtype (type): expected dtype
none_allowed (bool): if True, allows array to be None
Raises:
TypeError: if type/dtype does not match expected_type/expected_dtype
ValueError: if array
"""
if array_like is None:
if none_allowed:
return
else:
raise ValueError("array_like cannot be None")
assert expected_type in [
list,
np.ndarray,
], "expected type must be a list or np.ndarray"
if not isinstance(array_like, expected_type):
raise TypeError(
f"Object should be a {expected_type}, but is a {type(array_like)}"
)
if expected_type == list and not all(
isinstance(n, expected_dtype) for n in array_like
):
raise TypeError(f"List elements should all have type {expected_dtype}")
if (
expected_type == np.ndarray
and array_like.dtype != expected_dtype
and expected_dtype is not None
):
raise TypeError(
f"Array should have dtype {expected_dtype} but has {array_like.dtype}"
)
if np.asarray(array_like, dtype=object).size == 0:
raise ValueError("Object should not be empty, use None instead")
[docs]
def validate_lengths_equal(array_list):
"""Validate that arrays in list are equal in length
Some arrays may be None, and the validation for these are skipped.
Args:
array_list (list): list of array-like objects
Raises:
ValueError: if arrays are not equal in length
"""
if len(array_list) == 1:
return
for att1, att2 in zip(array_list[:-1], array_list[1:]):
if att1 is None or att2 is None:
continue
if not len(att1) == len(att2):
raise ValueError("Arrays have unequal length")
[docs]
def validate_tempos(tempo, tempo_unit):
"""Validate if tempos are well-formed
Args:
tempo (list): list of tempo values
tempo_unit (str): tempo unit, one of TEMPO_UNITS
Raises:
ValueError: if tempos are not well-formed
"""
validate_unit(tempo_unit, TEMPO_UNITS)
if (tempo < 0).any():
raise ValueError("tempos must be positive")
[docs]
def validate_beat_positions(positions, position_unit):
"""Validate if positions is well-formed.
Args:
positions (np.ndarray): an array of positions values
positions_unit (str): one of BEAT_POSITION_UNITS
Raises:
ValueError: if positions values are incompatible with the unit
"""
if positions is None:
return
validate_unit(position_unit, BEAT_POSITION_UNITS)
position_shape = np.shape(positions)
if len(position_shape) != 1:
raise ValueError(
f"positions should be 1d, but array has shape {position_shape}"
)
if (positions < 0).any():
raise ValueError("beat positions must be positive. Found values below 0.")
if position_unit in ["bar_index", "global_index"] and not np.array_equal(
np.floor(positions), positions
):
raise ValueError(
"measure index or global indexes should be integers. "
+ "Found fractional values."
)
# we expect no more than 32 beats per bar - this can be changed if a need arises!
if position_unit == "bar_index" and np.max(positions) > 32:
raise ValueError(
"beats with bar_index units should have indexes "
+ "which start from 1 at the beginning of every measure. "
+ "Found values > 16."
)
if position_unit == "bar_fraction" and np.max(positions) > 1:
raise ValueError(
"beats with bar_fraction units should be between 0 and 1. "
+ "Found values above 1."
)
[docs]
def validate_confidence(confidence, confidence_unit):
"""Validate if confidence is well-formed.
If confidence is None, validation passes automatically
Args:
confidence (np.ndarray): an array of confidence values
confidence_unit (str): one of AMPLITUDE_UNITS
Raises:
ValueError: if confidence values are incompatible with the unit
"""
if confidence is None:
return
validate_unit(confidence_unit, AMPLITUDE_UNITS)
if isinstance(confidence[0], list):
confidence_flat = [c for subconf in confidence for c in subconf]
else:
confidence_flat = confidence
if confidence_unit == "likelihood" and (
any([c < 0 for c in confidence_flat]) or any([c > 1 for c in confidence_flat])
):
raise ValueError(
"confidence with unit 'likelihood' should be between 0 and 1. "
+ "Found values outside [0, 1]."
)
if confidence_unit == "energy" and any([c < 0 for c in confidence_flat]):
raise ValueError(
"confidence with unit 'energy' should be nonnegative. "
+ "Found negative values."
)
if confidence_unit == "binary" and any([c not in [0, 1] for c in confidence_flat]):
raise ValueError(
"confidence with unit 'binary' should only have values of 0 or 1. "
+ "Found non-binary values."
)
if confidence_unit == "velocity" and (
any([c < 0 for c in confidence_flat]) or any([c > 127 for c in confidence_flat])
):
raise ValueError(
"confidence with unit 'velocity' should be between 0 and 127. "
+ "Found values outside [0, 127]."
)
[docs]
def validate_voicing(voicing, voicing_unit):
"""Validate if voicing is well-formed.
Args:
voicing (np.ndarray): an array of voicing values
voicing_unit (str): one of VOICING_UNITS
Raises:
ValueError: if voicing values are incompatible with the unit
"""
validate_unit(voicing_unit, VOICING_UNITS)
voicing_shape = np.shape(voicing)
if len(voicing_shape) != 1:
raise ValueError(f"voicings should be 1d, but array has shape {voicing_shape}")
if voicing_unit == "likelihood" and (
any([c < 0 for c in voicing]) or any([c > 1 for c in voicing])
):
raise ValueError(
"voicing with unit 'likelihood' should be between 0 and 1. "
+ "Found values outside [0, 1]."
)
if voicing_unit == "binary" and any([c not in [0, 1] for c in voicing]):
raise ValueError(
"voicing with unit 'binary' should only have values of 0 or 1. "
+ "Found non-binary values."
)
[docs]
def validate_pitches(pitches, pitch_unit):
"""Validate if pitches are well-formed.
Args:
pitches (np.ndarray): an array of pitch values
pitch_unit (str): pitch unit, one of PITCH_UNITS
Raises:
ValueError: if pitches do not correspond to the unit
"""
validate_unit(pitch_unit, PITCH_UNITS)
if pitch_unit in ["hz", "midi"] and np.any(
[np.any(np.array(p) < 0) for p in pitches]
):
raise ValueError(
"pitches should be positive numbers. "
+ "Unvoiced frames should be indicated using the confidence field, "
+ "rather than negative pitch values."
)
if pitch_unit == "midi" and np.any([np.any(np.array(p) > 127) for p in pitches]):
raise ValueError("pitches in midi format cannot be larger than 127. ")
if pitch_unit in ["pc", "note_name"]:
try:
librosa.note_to_midi(pitches)
except:
raise ValueError("invalid format for unit pc or note_name")
[docs]
def validate_chord_labels(chords, chord_unit):
"""Validate that chord labels conform to chord_unit namespace
Args:
chords (list): list of chord labels as strings
chord_unit (str): chord namespace, e.g. "harte"
Raises:
ValueError: If chords don't conform to namespace
"""
validate_unit(chord_unit, CHORD_UNITS)
if chord_unit in ["harte", "jams"]:
if chord_unit == "harte":
pattern = namespace("chord_harte")["properties"]["value"]["pattern"]
elif chord_unit == "jams":
pattern = namespace("chord")["properties"]["value"]["pattern"]
matches = [re.match(pattern, c) for c in chords]
if not all(matches):
non_matches = [c for (c, m) in zip(chords, matches) if not m]
raise ValueError(
"chords {} don't conform to chord_unit {}".format(
non_matches, chord_unit
)
)
[docs]
def validate_key_labels(keys, key_unit):
"""Validate that key labels conform to key_unit namespace
Args:
keys (list): list of key labels as strings
key_unit (str): key namespace, e.g. "harte"
Raises:
ValueError: If keys don't conform to namespace
"""
validate_unit(key_unit, KEY_UNITS)
if key_unit == "key_mode":
pattern = namespace("key_mode")["properties"]["value"]["pattern"]
matches = [re.match(pattern, c) for c in keys]
if not all(matches):
non_matches = [k for (k, m) in zip(keys, matches) if not m]
raise ValueError(
"keys {} don't conform to key_unit key-mode".format(non_matches)
)
[docs]
def validate_times(times, time_unit):
"""Validate if times are well-formed.
If times is None, validation passes automatically
Args:
times (np.ndarray): an array of time stamps
time_unit (str): one of TIME_UNITS
Raises:
ValueError: if times have negative values or are non-increasing
"""
if times is None:
return
validate_unit(time_unit, TIME_UNITS)
time_shape = np.shape(times)
if len(time_shape) != 1:
raise ValueError(f"Times should be 1d, but array has shape {time_shape}")
if (times < 0).any():
raise ValueError("times should be positive numbers")
if (times[1:] - times[:-1] <= 0).any():
raise ValueError("times should be strictly increasing")
[docs]
def validate_intervals(intervals, interval_unit):
"""Validate if intervals are well-formed.
If intervals is None, validation passes automatically
Args:
intervals (np.ndarray): (n x 2) array
interval_unit (str): interval unit, one of TIME_UNITS
Raises:
ValueError: if intervals have an invalid shape, have negative values
or if end times are smaller than start times.
"""
if intervals is None:
return
validate_unit(interval_unit, TIME_UNITS)
# validate that intervals have the correct shape
interval_shape = np.shape(intervals)
if len(interval_shape) != 2 or interval_shape[1] != 2:
raise ValueError(
f"Intervals should be arrays with two columns, but array has {interval_shape}"
)
# validate that time stamps are all positive numbers
if (intervals < 0).any():
raise ValueError(f"Interval values should be nonnegative numbers")
# validate that end times are bigger than start times
elif (intervals[:, 1] - intervals[:, 0] < 0).any():
raise ValueError(f"Interval start times must be smaller than end times")
[docs]
def validate_unit(unit, unit_values, allow_none=False):
"""Validate that the given unit is one of the allowed unit values.
Args:
unit (str): the unit name
unit_values (dict): dictionary of possible unit values
allow_none (bool): if true, allows unit=None to pass validation
Raises:
ValueError: If the given unit is not one of the allowed unit valuess
"""
if allow_none and not unit:
return
if unit not in unit_values:
raise ValueError("unit={} is not one of {}".format(unit, unit_values))
def validate_uniform_times(times):
time_diffs = np.diff(times)
median_diff = np.median(time_diffs)
if any(np.abs(time_diffs - median_diff) > 0.01):
raise ValueError(
"time stamps should be uniformly spaced, but found non-uniform spacing"
)