# -*- coding: utf-8 -*-
"""track object utility functions
"""
import types
import numpy as np
MAX_STR_LEN = 100
class Track(object):
def __repr__(self):
properties = [v for v in dir(self.__class__) if not v.startswith("_")]
attributes = [
v for v in dir(self) if not v.startswith("_") and v not in properties
]
repr_str = "Track(\n"
for attr in attributes:
val = getattr(self, attr)
if isinstance(val, str):
if len(val) > MAX_STR_LEN:
val = "...{}".format(val[-MAX_STR_LEN:])
val = '"{}"'.format(val)
repr_str += " {}={},\n".format(attr, val)
for prop in properties:
val = getattr(self.__class__, prop)
if isinstance(val, types.FunctionType):
continue
if val.__doc__ is None:
raise ValueError("{} has no documentation".format(prop))
val_type_str = val.__doc__.split(":")[0]
repr_str += " {}: {},\n".format(prop, val_type_str)
repr_str += ")"
return repr_str
def to_jams(self):
raise NotImplementedError
[docs]class MultiTrack(Track):
"""MultiTrack class.
A multitrack class is a collection of track objects and their associated audio
that can be mixed together.
A multitrack is iteslf a Track, and can have its own associated audio (such as
a mastered mix), its own metadata and its own annotations.
"""
def _check_mixable(self):
if not hasattr(self, "tracks") or not hasattr(self, "track_audio_property"):
raise NotImplementedError(
"This MultiTrack has no tracks/track_audio_property. Cannot perform mixing"
)
[docs] def get_target(self, track_keys, weights=None, average=True, enforce_length=True):
"""Get target which is a linear mixture of tracks
Args:
track_keys (list): list of track keys to mix together
weights (list or None): list of positive scalars to be used in the average
average (bool): if True, computes a weighted average of the tracks
if False, computes a weighted sum of the tracks
enforce_length (bool): If True, raises ValueError if the tracks are
not the same length. If False, pads audio with zeros to match the length
of the longest track
Returns:
target (np.ndarray): target audio with shape (n_channels, n_samples)
Raises:
ValueError:
if sample rates of the tracks are not equal
if enforce_length=True and lengths are not equal
"""
self._check_mixable()
signals = []
lengths = []
sample_rates = []
for k in track_keys:
audio, sample_rate = getattr(self.tracks[k], self.track_audio_property)
# ensure all signals are shape (n_channels, n_samples)
if len(audio.shape) == 1:
audio = audio[np.newaxis, :]
signals.append(audio)
lengths.append(audio.shape[1])
sample_rates.append(sample_rate)
if len(set(sample_rates)) > 1:
raise ValueError(
"Sample rates for tracks {} are not equal: {}".format(
track_keys, sample_rates
)
)
max_length = np.max(lengths)
if any([l != max_length for l in lengths]):
if enforce_length:
raise ValueError(
"Track's {} audio are not the same length {}. Use enforce_length=False to pad with zeros.".format(
track_keys, lengths
)
)
else:
# pad signals to the max length
signals = [
np.pad(signal, ((0, 0), (0, max_length - signal.shape[1])))
for signal in signals
]
if weights is None:
weights = np.ones((len(track_keys),))
target = np.average(signals, axis=0, weights=weights)
if not average:
target *= np.sum(weights)
return target
[docs] def get_random_target(self, n_tracks=None, min_weight=0.3, max_weight=1.0):
"""Get a random target by combining a random selection of tracks with random weights
Args:
n_tracks (int or None): number of tracks to randomly mix. If None, uses all tracks
min_weight (float): minimum possible weight when mixing
max_weight (float): maximum possible weight when mixing
Returns:
target (np.ndarray): mixture audio with shape (n_samples, n_channels)
tracks (list): list of keys of included tracks
weights (list): list of weights used to mix tracks
"""
self._check_mixable()
tracks = list(self.tracks.keys())
if n_tracks is not None and n_tracks < len(tracks):
tracks = np.random.choice(tracks, n_tracks, replace=False)
weights = np.random.uniform(low=min_weight, high=max_weight, size=len(tracks))
target = self.get_target(tracks, weights=weights)
return target, tracks, weights
[docs] def get_mix(self):
"""Create a linear mixture given a subset of tracks.
Args:
track_keys (list): list of track keys to mix together
Returns:
target (np.ndarray): mixture audio with shape (n_samples, n_channels)
"""
self._check_mixable()
return self.get_target(list(self.tracks.keys()))