Source code for mirdata.beatles

# -*- coding: utf-8 -*-
"""Beatles Dataset Loader

The Beatles Dataset includes beat and metric position, chord, key, and segmentation
annotations for 179 Beatles songs. Details can be found in http://matthiasmauch.net/_pdf/mauch_omp_2009.pdf and
http://isophonics.net/content/reference-annotations-beatles.

"""

import csv
import librosa
import numpy as np
import os

from mirdata import download_utils
from mirdata import jams_utils
from mirdata import track
from mirdata import utils

DATASET_DIR = 'Beatles'
REMOTES = {
    'annotations': download_utils.RemoteFileMetadata(
        filename='The Beatles Annotations.tar.gz',
        url='http://isophonics.net/files/annotations/The%20Beatles%20Annotations.tar.gz',
        checksum='62425c552d37c6bb655a78e4603828cc',
        destination_dir='annotations',
    )
}

DATA = utils.LargeData('beatles_index.json')


[docs]class Track(track.Track): """Beatles track class Args: track_id (str): track id of the track data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Attributes: audio_path (str): track audio path beats_path (str): beat annotation path chords_path (str): chord annotation path keys_path (str): key annotation path sections_path (str): sections annotation path title (str): title of the track track_id (str): track id """ def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError('{} is not a valid track ID in Beatles'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.beats_path = utils.none_path_join( [self._data_home, self._track_paths['beat'][0]] ) self.chords_path = os.path.join(self._data_home, self._track_paths['chords'][0]) self.keys_path = utils.none_path_join( [self._data_home, self._track_paths['keys'][0]] ) self.sections_path = os.path.join( self._data_home, self._track_paths['sections'][0] ) self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.title = os.path.basename(self._track_paths['sections'][0]).split('.')[0] @utils.cached_property def beats(self): """BeatData: human-labeled beat annotation""" return load_beats(self.beats_path) @utils.cached_property def chords(self): """ChordData: chord annotation""" return load_chords(self.chords_path) @utils.cached_property def key(self): """KeyData: key annotation""" return load_key(self.keys_path) @utils.cached_property def sections(self): """SectionData: section annotation""" return load_sections(self.sections_path) @property def audio(self): """(np.ndarray, float): audio signal, sample rate""" return load_audio(self.audio_path)
[docs] def to_jams(self): """Jams: the track's data in jams format""" return jams_utils.jams_converter( audio_path=self.audio_path, beat_data=[(self.beats, None)], section_data=[(self.sections, None)], chord_data=[(self.chords, None)], key_data=[(self.key, None)], metadata={'artist': 'The Beatles', 'title': self.title}, )
[docs]def load_audio(audio_path): """Load a Beatles audio file. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=None, mono=True)
[docs]def download(data_home=None, force_overwrite=False, cleanup=True): """Download the Beatles Dataset (annotations). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. """ # use the default location: ~/mir_datasets/Beatles if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = """ Unfortunately the audio files of the Beatles dataset are not available for download. If you have the Beatles dataset, place the contents into a folder called Beatles with the following structure: > Beatles/ > annotations/ > audio/ and copy the Beatles folder to {} """.format( data_home ) download_utils.downloader( data_home, remotes=REMOTES, info_message=download_message, force_overwrite=force_overwrite, cleanup=cleanup, )
[docs]def validate(data_home=None, silence=False): """Validate if a local version of this dataset is consistent Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: missing_files (list): List of file paths that are in the dataset index but missing locally invalid_checksums (list): List of file paths where the expected file exists locally but has a different checksum than the reference """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) missing_files, invalid_checksums = utils.validator( DATA.index, data_home, silence=silence ) return missing_files, invalid_checksums
[docs]def track_ids(): """Get the list of track IDs for this dataset Returns: (list): A list of track ids """ return list(DATA.index.keys())
[docs]def load(data_home=None): """Load Beatles dataset Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: (dict): {`track_id`: track data} """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) beatles_data = {} for key in track_ids(): beatles_data[key] = Track(key, data_home=data_home) return beatles_data
[docs]def load_beats(beats_path): """Load Beatles format beat data from a file Args: beats_path (str): path to beat annotation file Returns: (utils.BeatData): loaded beat data """ if beats_path is None: return None if not os.path.exists(beats_path): raise IOError("beats_path {} does not exist".format(beats_path)) beat_times, beat_positions = [], [] with open(beats_path, 'r') as fhandle: dialect = csv.Sniffer().sniff(fhandle.read(1024)) fhandle.seek(0) reader = csv.reader(fhandle, dialect) for line in reader: beat_times.append(float(line[0])) beat_positions.append(line[-1]) beat_positions = _fix_newpoint(np.array(beat_positions)) # After fixing New Point labels convert positions to int beat_positions = [int(b) for b in beat_positions] beat_data = utils.BeatData(np.array(beat_times), np.array(beat_positions)) return beat_data
[docs]def load_chords(chords_path): """Load Beatles format chord data from a file Args: chords_path (str): path to chord annotation file Returns: (utils.ChordData): loaded chord data """ if chords_path is None: return None if not os.path.exists(chords_path): raise IOError("chords_path {} does not exist".format(chords_path)) start_times, end_times, chords = [], [], [] with open(chords_path, 'r') as f: dialect = csv.Sniffer().sniff(f.read(1024)) f.seek(0) reader = csv.reader(f, dialect) for line in reader: start_times.append(float(line[0])) end_times.append(float(line[1])) chords.append(line[2]) chord_data = utils.ChordData(np.array([start_times, end_times]).T, chords) return chord_data
[docs]def load_key(keys_path): """Load Beatles format key data from a file Args: keys_path (str): path to key annotation file Returns: (utils.KeyData): loaded key data """ if keys_path is None: return None if not os.path.exists(keys_path): raise IOError("keys_path {} does not exist".format(keys_path)) start_times, end_times, keys = [], [], [] with open(keys_path, 'r') as fhandle: reader = csv.reader(fhandle, delimiter='\t') for line in reader: if line[2] == 'Key': start_times.append(float(line[0])) end_times.append(float(line[1])) keys.append(line[3]) key_data = utils.KeyData(np.array(start_times), np.array(end_times), np.array(keys)) return key_data
[docs]def load_sections(sections_path): """Load Beatles format section data from a file Args: sections_path (str): path to section annotation file Returns: (utils.SectionData): loaded section data """ if sections_path is None: return None if not os.path.exists(sections_path): raise IOError("sections_path {} does not exist".format(sections_path)) start_times, end_times, sections = [], [], [] with open(sections_path, 'r') as fhandle: reader = csv.reader(fhandle, delimiter='\t') for line in reader: start_times.append(float(line[0])) end_times.append(float(line[1])) sections.append(line[3]) section_data = utils.SectionData(np.array([start_times, end_times]).T, sections) return section_data
def _fix_newpoint(beat_positions): """Fills in missing beat position labels by inferring the beat position from neighboring beats. """ while np.any(beat_positions == 'New Point'): idxs = np.where(beat_positions == 'New Point')[0] for i in idxs: if i < len(beat_positions) - 1: if not beat_positions[i + 1] == 'New Point': beat_positions[i] = str(np.mod(int(beat_positions[i + 1]) - 1, 4)) if i == len(beat_positions) - 1: if not beat_positions[i - 1] == 'New Point': beat_positions[i] = str(np.mod(int(beat_positions[i - 1]) + 1, 4)) beat_positions[beat_positions == '0'] = '4' return beat_positions
[docs]def cite(): """Print the reference""" cite_data = """ =========== MLA =========== Mauch, Matthias, et al. "OMRAS2 metadata project 2009." 10th International Society for Music Information Retrieval Conference (2009) ========== Bibtex ========== @inproceedings{mauch2009beatles, title={OMRAS2 metadata project 2009}, author={Mauch, Matthias and Cannam, Chris and Davies, Matthew and Dixon, Simon and Harte, Christopher and Kolozali, Sefki and Tidhar, Dan and Sandler, Mark}, booktitle={12th International Society for Music Information Retrieval Conference}, year={2009}, series = {ISMIR} } """ print(cite_data)