Source code for mirdata.rwc_popular

# -*- coding: utf-8 -*-
"""RWC Popular Dataset Loader

The Popular Music Database consists of 100 songs — 20 songs with English lyrics
performed in the style of popular music typical of songs on the American hit
charts in the 1980s, and 80 songs with Japanese lyrics performed in the style of
modern Japanese popular music typical of songs on the Japanese hit charts in
the 1990s.

For more details, please visit: https://staff.aist.go.jp/m.goto/RWC-MDB/rwc-mdb-p.html
"""
import csv
import librosa
import logging
import numpy as np
import os

from mirdata import download_utils
from mirdata import jams_utils
from mirdata import track
from mirdata import utils

# these functions are identical for all rwc datasets
from mirdata.rwc_classical import (
    load_beats,
    load_sections,
    load_audio,
    _duration_to_sec,
)

REMOTES = {
    'metadata': download_utils.RemoteFileMetadata(
        filename='rwc-p.csv',
        url='https://github.com/magdalenafuentes/metadata/archive/master.zip',
        checksum='7dbe87fedbaaa1f348625a2af1d78030',
        destination_dir=None,
    ),
    'annotations_beat': download_utils.RemoteFileMetadata(
        filename='AIST.RWC-MDB-P-2001.BEAT.zip',
        url='https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.BEAT.zip',
        checksum='3858aa989535bd7196b3cd07b512b5b6',
        destination_dir='annotations',
    ),
    'annotations_sections': download_utils.RemoteFileMetadata(
        filename='AIST.RWC-MDB-P-2001.CHORUS.zip',
        url='https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.CHORUS.zip',
        checksum='f76b3a32701fbd9bf78baa608f692a77',
        destination_dir='annotations',
    ),
    'annotations_chords': download_utils.RemoteFileMetadata(
        filename='AIST.RWC-MDB-P-2001.CHORD.zip',
        url='https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.CHORD.zip',
        checksum='68379c88bc8ec3f1907b32a3579197c5',
        destination_dir='annotations',
    ),
    'annotations_vocal_act': download_utils.RemoteFileMetadata(
        filename='AIST.RWC-MDB-P-2001.VOCA_INST.zip',
        url='https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.VOCA_INST.zip',
        checksum='47ded648a496407ef49dba9c8bf80e87',
        destination_dir='annotations',
    ),
}

DATASET_DIR = 'RWC-Popular'


def _load_metadata(data_home):

    metadata_path = os.path.join(data_home, 'metadata-master', 'rwc-p.csv')

    if not os.path.exists(metadata_path):
        logging.info(
            'Metadata file {} not found.'.format(metadata_path)
            + 'You can download the metadata file by running download()'
        )
        return None

    with open(metadata_path, 'r') as fhandle:
        dialect = csv.Sniffer().sniff(fhandle.read(1024))
        fhandle.seek(0)
        reader = csv.reader(fhandle, dialect)
        raw_data = []
        for line in reader:
            if line[0] != 'Piece No.':
                raw_data.append(line)

    metadata_index = {}
    for line in raw_data:
        if line[0] == 'Piece No.':
            continue
        p = '00' + line[0].split('.')[1][1:]
        track_id = 'RM-P{}'.format(p[len(p) - 3 :])

        metadata_index[track_id] = {
            'piece_number': line[0],
            'suffix': line[1],
            'track_number': line[2],
            'title': line[3],
            'artist': line[4],
            'singer_information': line[5],
            'duration': _duration_to_sec(line[6]),
            'tempo': line[7],
            'instruments': line[8],
            'drum_information': line[9],
        }

    metadata_index['data_home'] = data_home

    return metadata_index


DATA = utils.LargeData('rwc_popular_index.json', _load_metadata)


[docs]class Track(track.Track): """rwc_popular Track class Args: track_id (str): track id of the track data_home (str): Local path where the dataset is stored. default=None If `None`, looks for the data in the default directory, `~/mir_datasets` Attributes: artist (str): artist audio_path (str): path of the audio file beats_path (str): path of the beat annotation file chords_path (str): path of the chord annotation file drum_information (str): If the drum is 'Drum sequences', 'Live drums', or 'Drum loops' duration (float): Duration of the track in seconds instruments (str): List of used instruments piece_number (str): Piece number, [1-50] sections_path (str): path of the section annotation file singer_information (str): TODO suffix (str): M01-M04 tempo (str): Tempo of the track in BPM title (str): title track_id (str): track id track_number (str): CD track number voca_inst_path (str): path of the vocal/instrumental annotation file """ def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in RWC-Popular'.format(track_id) ) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.sections_path = os.path.join( self._data_home, self._track_paths['sections'][0] ) self.beats_path = os.path.join(self._data_home, self._track_paths['beats'][0]) self.chords_path = os.path.join(self._data_home, self._track_paths['chords'][0]) self.voca_inst_path = os.path.join( self._data_home, self._track_paths['voca_inst'][0] ) metadata = DATA.metadata(data_home) if metadata is not None and track_id in metadata: self._track_metadata = metadata[track_id] else: # annotations with missing metadata self._track_metadata = { 'piece_number': None, 'suffix': None, 'track_number': None, 'title': None, 'artist': None, 'singer_information': None, 'duration': None, 'tempo': None, 'instruments': None, 'drum_information': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.piece_number = self._track_metadata['piece_number'] self.suffix = self._track_metadata['suffix'] self.track_number = self._track_metadata['track_number'] self.title = self._track_metadata['title'] self.artist = self._track_metadata['artist'] self.singer_information = self._track_metadata['singer_information'] self.duration = self._track_metadata['duration'] self.tempo = self._track_metadata['tempo'] self.instruments = self._track_metadata['instruments'] self.drum_information = self._track_metadata['drum_information'] @utils.cached_property def sections(self): """SectionData: human-labeled section annotation""" return load_sections(self.sections_path) @utils.cached_property def beats(self): """BeatData: human-labeled beat annotation""" return load_beats(self.beats_path) @utils.cached_property def chords(self): """ChordData: human-labeled chord annotation""" return load_chords(self.chords_path) @utils.cached_property def vocal_instrument_activity(self): """EventData: human-labeled vocal/instrument activity""" return load_voca_inst(self.voca_inst_path) @property def audio(self): """(np.ndarray, float): audio signal, sample rate""" return load_audio(self.audio_path)
[docs] def to_jams(self): """Jams: the track's data in jams format""" return jams_utils.jams_converter( audio_path=self.audio_path, beat_data=[(self.beats, None)], section_data=[(self.sections, None)], chord_data=[(self.chords, None)], metadata=self._track_metadata, )
[docs]def download( data_home=None, partial_download=None, force_overwrite=False, cleanup=True ): """Download the RWC Popular (annotations and metadata). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data partial_download (list): List indicating what to partially download. The list can include any of: * `'annotations_beat'` the beat annotation files * `'annotations_sections'` the sections annotation files * `'annotations_chords'` the chords annotation files * `'annotations_vocal_act'` the vocal activity annotation files * `'metadata'` the metadata files If `None`, all data is downloaded. cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the RWC-Popular dataset are not available for download. If you have the RWC-Popular dataset, place the contents into a folder called RWC-Popular with the following structure: > RWC-Popular/ > annotations/ > audio/rwc-p-m0i with i in [1 .. 7] > metadata-master/ and copy the RWC-Popular folder to {} """.format( data_home ) download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, info_message=info_message, force_overwrite=force_overwrite, cleanup=cleanup, )
[docs]def validate(data_home=None, silence=False): """Validate if the stored dataset is a valid version Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: missing_files (list): List of file paths that are in the dataset index but missing locally invalid_checksums (list): List of file paths that file exists in the dataset index but has a different checksum compare to the reference checksum """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) missing_files, invalid_checksums = utils.validator( DATA.index, data_home, silence=silence ) return missing_files, invalid_checksums
[docs]def track_ids(): """Return track ids Returns: (list): A list of track ids """ return list(DATA.index.keys())
[docs]def load(data_home=None): """Load RWC-Genre dataset Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: (dict): {`track_id`: track data} """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) rwc_popular_data = {} for key in track_ids(): rwc_popular_data[key] = Track(key, data_home=data_home) return rwc_popular_data
def load_chords(chords_path): if not os.path.exists(chords_path): raise IOError("chords_path {} does not exist".format(chords_path)) begs = [] # timestamps of chord beginnings ends = [] # timestamps of chord endings chords = [] # chord labels if os.path.exists(chords_path): with open(chords_path, 'r') as fhandle: reader = csv.reader(fhandle, delimiter='\t') for line in reader: begs.append(float(line[0])) ends.append(float(line[1])) chords.append(line[2]) return utils.ChordData(np.array([begs, ends]).T, chords) def load_voca_inst(voca_inst_path): if not os.path.exists(voca_inst_path): raise IOError("voca_inst_path {} does not exist".format(voca_inst_path)) begs = [] # timestamps of vocal-instrument activity beginnings ends = [] # timestamps of vocal-instrument activity endings events = [] # vocal-instrument activity labels with open(voca_inst_path, 'r') as fhandle: reader = csv.reader(fhandle, delimiter='\t') raw_data = [] for line in reader: if line[0] != 'Piece No.': raw_data.append(line) for i in range(len(raw_data)): # Parsing vocal-instrument activity as intervals (beg, end, event) if raw_data[i] != raw_data[-1]: begs.append(float(raw_data[i][0])) ends.append(float(raw_data[i + 1][0])) events.append(raw_data[i][1]) return utils.EventData(np.array(begs), np.array(ends), np.array(events)) def cite(): cite_data = """ =========== MLA =========== If using beat and section annotations please cite: Goto, Masataka, et al., "RWC Music Database: Popular, Classical and Jazz Music Databases.", 3rd International Society for Music Information Retrieval Conference (2002) If using chord annotations please cite: Cho, Taemin, and Juan P. Bello., "A feature smoothing method for chord recognition using recurrence plots.", 12th International Society for Music Information Retrieval Conference (2011) If using vocal-instrument activity annotations please cite: Mauch, Matthias, et al., "Timbre and Melody Features for the Recognition of Vocal Activity and Instrumental Solos in Polyphonic Music.", 12th International Society for Music Information Retrieval Conference (2011) ========== Bibtex ========== If using beat and section annotations please cite: @inproceedings{goto2002rwc, title={RWC Music Database: Popular, Classical and Jazz Music Databases.}, author={Goto, Masataka and Hashiguchi, Hiroki and Nishimura, Takuichi and Oka, Ryuichi}, booktitle={3rd International Society for Music Information Retrieval Conference}, year={2002}, series={ISMIR}, } If using chord annotations please cite: @inproceedings{cho2011feature, title={A feature smoothing method for chord recognition using recurrence plots}, author={Cho, Taemin and Bello, Juan P}, booktitle={12th International Society for Music Information Retrieval Conference}, year={2011}, series={ISMIR}, } If using vocal-instrument activity annotations please cite: @inproceedings{mauch2011timbre, title={Timbre and Melody Features for the Recognition of Vocal Activity and Instrumental Solos in Polyphonic Music.}, author={Mauch, Matthias and Fujihara, Hiromasa and Yoshii, Kazuyoshi and Goto, Masataka}, booktitle={ISMIR}, year={2011}, series={ISMIR}, } """ print(cite_data)