Source code for mirdata.track

# -*- coding: utf-8 -*-
"""track object utility functions
"""
import types

import numpy as np

MAX_STR_LEN = 100


class Track(object):
    def __repr__(self):
        properties = [v for v in dir(self.__class__) if not v.startswith("_")]
        attributes = [
            v for v in dir(self) if not v.startswith("_") and v not in properties
        ]

        repr_str = "Track(\n"

        for attr in attributes:
            val = getattr(self, attr)
            if isinstance(val, str):
                if len(val) > MAX_STR_LEN:
                    val = "...{}".format(val[-MAX_STR_LEN:])
                val = '"{}"'.format(val)
            repr_str += "  {}={},\n".format(attr, val)

        for prop in properties:
            val = getattr(self.__class__, prop)
            if isinstance(val, types.FunctionType):
                continue

            if val.__doc__ is None:
                raise ValueError("{} has no documentation".format(prop))

            val_type_str = val.__doc__.split(":")[0]
            repr_str += "  {}: {},\n".format(prop, val_type_str)

        repr_str += ")"
        return repr_str

    def to_jams(self):
        raise NotImplementedError


[docs]class MultiTrack(Track): """MultiTrack class. A multitrack class is a collection of track objects and their associated audio that can be mixed together. A multitrack is iteslf a Track, and can have its own associated audio (such as a mastered mix), its own metadata and its own annotations. """ def _check_mixable(self): if not hasattr(self, "tracks") or not hasattr(self, "track_audio_property"): raise NotImplementedError( "This MultiTrack has no tracks/track_audio_property. Cannot perform mixing" )
[docs] def get_target(self, track_keys, weights=None, average=True, enforce_length=True): """Get target which is a linear mixture of tracks Args: track_keys (list): list of track keys to mix together weights (list or None): list of positive scalars to be used in the average average (bool): if True, computes a weighted average of the tracks if False, computes a weighted sum of the tracks enforce_length (bool): If True, raises ValueError if the tracks are not the same length. If False, pads audio with zeros to match the length of the longest track Returns: target (np.ndarray): target audio with shape (n_channels, n_samples) Raises: ValueError: if sample rates of the tracks are not equal if enforce_length=True and lengths are not equal """ self._check_mixable() signals = [] lengths = [] sample_rates = [] for k in track_keys: audio, sample_rate = getattr(self.tracks[k], self.track_audio_property) # ensure all signals are shape (n_channels, n_samples) if len(audio.shape) == 1: audio = audio[np.newaxis, :] signals.append(audio) lengths.append(audio.shape[1]) sample_rates.append(sample_rate) if len(set(sample_rates)) > 1: raise ValueError( "Sample rates for tracks {} are not equal: {}".format( track_keys, sample_rates ) ) max_length = np.max(lengths) if any([l != max_length for l in lengths]): if enforce_length: raise ValueError( "Track's {} audio are not the same length {}. Use enforce_length=False to pad with zeros.".format( track_keys, lengths ) ) else: # pad signals to the max length signals = [ np.pad(signal, ((0, 0), (0, max_length - signal.shape[1]))) for signal in signals ] if weights is None: weights = np.ones((len(track_keys),)) target = np.average(signals, axis=0, weights=weights) if not average: target *= np.sum(weights) return target
[docs] def get_random_target(self, n_tracks=None, min_weight=0.3, max_weight=1.0): """Get a random target by combining a random selection of tracks with random weights Args: n_tracks (int or None): number of tracks to randomly mix. If None, uses all tracks min_weight (float): minimum possible weight when mixing max_weight (float): maximum possible weight when mixing Returns: target (np.ndarray): mixture audio with shape (n_samples, n_channels) tracks (list): list of keys of included tracks weights (list): list of weights used to mix tracks """ self._check_mixable() tracks = list(self.tracks.keys()) if n_tracks is not None and n_tracks < len(tracks): tracks = np.random.choice(tracks, n_tracks, replace=False) weights = np.random.uniform(low=min_weight, high=max_weight, size=len(tracks)) target = self.get_target(tracks, weights=weights) return target, tracks, weights
[docs] def get_mix(self): """Create a linear mixture given a subset of tracks. Args: track_keys (list): list of track keys to mix together Returns: target (np.ndarray): mixture audio with shape (n_samples, n_channels) """ self._check_mixable() return self.get_target(list(self.tracks.keys()))