Source code for mirdata.datasets.rwc_classical

# -*- coding: utf-8 -*-
"""RWC Classical Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    The Classical Music Database consists of 50 pieces

    * Symphonies: 4 pieces
    * Concerti: 2 pieces
    * Orchestral music: 4 pieces
    * Chamber music: 10 pieces
    * Solo performances: 24 pieces
    * Vocal performances: 6 pieces

    **A note about the Beat annotations:**

    - 48 corresponds to the duration of a quarter note (crotchet)
    - 24 corresponds to the duration of an eighth note (quaver)
    - 384 corresponds to the position of a downbeat

    In 4/4 time signature, they correspond as follows:

    .. code-block:: latex

        384: 1st beat in a measure (i.e., downbeat position)
        48: 2nd beat
        96: 3rd beat
        144 4th beat

    In 3/4 time signature, they correspond as follows:

    .. code-block:: latex

        384: 1st beat in a measure (i.e., downbeat position)
        48: 2nd beat
        96: 3rd beat

    In 6/8 time signature, they correspond as follows:
    
    .. code-block:: latex

        384: 1st beat in a measure (i.e., downbeat position)
        24: 2nd beat
        48: 3rd beat
        72: 4th beat
        96: 5th beat
        120: 6th beat

    For more details, please visit: https://staff.aist.go.jp/m.goto/RWC-MDB/rwc-mdb-c.html

"""
import csv
import logging
import os

import librosa
import numpy as np

from mirdata import download_utils
from mirdata import jams_utils
from mirdata import core
from mirdata import annotations

BIBTEX = """@inproceedings{goto2002rwc,
  title={RWC Music Database: Popular, Classical and Jazz Music Databases.},
  author={Goto, Masataka and Hashiguchi, Hiroki and Nishimura, Takuichi and Oka, Ryuichi},
  booktitle={3rd International Society for Music Information Retrieval Conference},
  year={2002},
  series={ISMIR},
}"""
REMOTES = {
    "annotations_beat": download_utils.RemoteFileMetadata(
        filename="AIST.RWC-MDB-C-2001.BEAT.zip",
        url="https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-C-2001.BEAT.zip",
        checksum="e8ee05854833cbf5eb7280663f71c29b",
        destination_dir="annotations",
    ),
    "annotations_sections": download_utils.RemoteFileMetadata(
        filename="AIST.RWC-MDB-C-2001.CHORUS.zip",
        url="https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-C-2001.CHORUS.zip",
        checksum="f77bd527510376f59f5a2eed8fd7feb3",
        destination_dir="annotations",
    ),
    "metadata": download_utils.RemoteFileMetadata(
        filename="master.zip",
        url="https://github.com/magdalenafuentes/metadata/archive/master.zip",
        checksum="7dbe87fedbaaa1f348625a2af1d78030",
        destination_dir="",
    ),
}
DOWNLOAD_INFO = """
    Unfortunately the audio files of the RWC-Classical dataset are not available
    for download. If you have the RWC-Classical dataset, place the contents into a
    folder called RWC-Classical with the following structure:
        > RWC-Classical/
            > annotations/
            > audio/rwc-c-m0i with i in [1 .. 6]
            > metadata-master/
    and copy the RWC-Classical folder to {}
"""

LICENSE_INFO = """
From the dataset's owner webpage:

'Users who have submitted the Pledge and received authorization may freely use the database for research purposes 
without facing the usual copyright restrictions, but all of the copyrights and neighboring rights connected with 
this database belong to the National Institute of Advanced Industrial Science and Technology and are managed by the 
RWC Music Database Administrator. Persons or organizations that have not submitted a Pledge and that have not 
received authorization may not use the database.'

See https://staff.aist.go.jp/m.goto/RWC-MDB/ for more details.
"""


def _load_metadata(data_home):

    metadata_path = os.path.join(data_home, "metadata-master", "rwc-c.csv")

    if not os.path.exists(metadata_path):
        logging.info(
            "Metadata file {} not found.".format(metadata_path)
            + "You can download the metadata file by running download()"
        )
        return None

    with open(metadata_path, "r") as fhandle:
        dialect = csv.Sniffer().sniff(fhandle.read(1024))
        fhandle.seek(0)
        reader = csv.reader(fhandle, dialect)
        raw_data = []
        for line in reader:
            if line[0] != "Piece No.":
                raw_data.append(line)

    metadata_index = {}
    for line in raw_data:
        if line[0] == "Piece No.":
            continue
        p = "00" + line[0].split(".")[1][1:]
        track_id = "RM-C{}".format(p[len(p) - 3 :])

        metadata_index[track_id] = {
            "piece_number": line[0],
            "suffix": line[1],
            "track_number": line[2],
            "title": line[3],
            "composer": line[4],
            "artist": line[5],
            "duration": _duration_to_sec(line[6]),
            "category": line[7],
        }

    metadata_index["data_home"] = data_home

    return metadata_index


DATA = core.LargeData("rwc_classical_index.json", _load_metadata)


[docs]class Track(core.Track):
    """rwc_classical Track class

    Args:
        track_id (str): track id of the track

    Attributes:
        artist (str): the track's artist
        audio_path (str): path of the audio file
        beats_path (str): path of the beat annotation file
        category (str): One of 'Symphony', 'Concerto', 'Orchestral',
            'Solo', 'Chamber', 'Vocal', or blank.
        composer (str): Composer of this Track.
        duration (float): Duration of the track in seconds
        piece_number (str): Piece number of this Track, [1-50]
        sections_path (str): path of the section annotation file
        suffix (str): string within M01-M06
        title (str): Title of The track.
        track_id (str): track id
        track_number (str): CD track number of this Track

    Cached Properties:
        sections (SectionData): human-labeled section annotations
        beats (BeatData): human-labeled beat annotations

    """

    def __init__(self, track_id, data_home):
        if track_id not in DATA.index["tracks"]:
            raise ValueError(
                "{} is not a valid track ID in rwc_classical".format(track_id)
            )

        self.track_id = track_id
        self._data_home = data_home
        self._track_paths = DATA.index["tracks"][track_id]
        self.sections_path = os.path.join(
            self._data_home, self._track_paths["sections"][0]
        )
        self.beats_path = os.path.join(self._data_home, self._track_paths["beats"][0])

        metadata = DATA.metadata(data_home)
        if metadata is not None and track_id in metadata:
            self._track_metadata = metadata[track_id]
        else:
            self._track_metadata = {
                "piece_number": None,
                "suffix": None,
                "track_number": None,
                "title": None,
                "composer": None,
                "artist": None,
                "duration": None,
                "category": None,
            }

        self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0])

        self.piece_number = self._track_metadata["piece_number"]
        self.suffix = self._track_metadata["suffix"]
        self.track_number = self._track_metadata["track_number"]
        self.title = self._track_metadata["title"]
        self.composer = self._track_metadata["composer"]
        self.artist = self._track_metadata["artist"]
        self.duration = self._track_metadata["duration"]
        self.category = self._track_metadata["category"]

    @core.cached_property
    def sections(self):
        return load_sections(self.sections_path)

    @core.cached_property
    def beats(self):
        return load_beats(self.beats_path)

    @property
    def audio(self):
        """The track's audio

        Returns:
           * np.ndarray - audio signal
           * float - sample rate

        """
        return load_audio(self.audio_path)

[docs]    def to_jams(self):
        """Get the track's data in jams format

        Returns:
            jams.JAMS: the track's data in jams format

        """
        return jams_utils.jams_converter(
            audio_path=self.audio_path,
            beat_data=[(self.beats, None)],
            section_data=[(self.sections, None)],
            metadata=self._track_metadata,
        )


[docs]def load_audio(audio_path):
    """Load a RWC audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=None, mono=True)


[docs]def load_sections(sections_path):
    """Load rwc section data from a file

    Args:
        sections_path (str): path to sections annotation file

    Returns:
        SectionData: section data

    """
    if not os.path.exists(sections_path):
        raise IOError("sections_path {} does not exist".format(sections_path))

    begs = []  # timestamps of section beginnings
    ends = []  # timestamps of section endings
    secs = []  # section labels

    with open(sections_path, "r") as fhandle:
        reader = csv.reader(fhandle, delimiter="\t")
        for line in reader:
            begs.append(float(line[0]) / 100.0)
            ends.append(float(line[1]) / 100.0)
            secs.append(line[2])

    if not begs:  # some files are empty
        return None

    return annotations.SectionData(np.array([begs, ends]).T, secs)


def _position_in_bar(beat_positions, beat_times):
    """Map raw rwc eat data to beat position in bar (e.g. 1, 2, 3, 4).

    Args:
        beat_positions (np.ndarray): raw rwc beat positions
        beat_times (np.ndarray): raw rwc time stamps

    Returns:
        * np.ndarray: normalized beat positions
        * np.ndarray: normalized time stamps

    """
    # Remove -1
    _beat_positions = np.delete(beat_positions, np.where(beat_positions == -1))
    beat_times_corrected = np.delete(beat_times, np.where(beat_positions == -1))

    # Create corrected array with downbeat positions
    beat_positions_corrected = np.zeros((len(_beat_positions),))
    downbeat_positions = np.where(_beat_positions == 384)[0]
    _beat_positions[downbeat_positions] = 1
    beat_positions_corrected[downbeat_positions] = 1

    # Propagate positions
    for b in range(0, len(_beat_positions)):
        if _beat_positions[b] > _beat_positions[b - 1]:
            beat_positions_corrected[b] = beat_positions_corrected[b - 1] + 1

    if not downbeat_positions[0] == 0:
        timesig_next_bar = beat_positions_corrected[downbeat_positions[1] - 1]
        for b in range(1, downbeat_positions[0] + 1):
            beat_positions_corrected[downbeat_positions[0] - b] = (
                timesig_next_bar - b + 1
            )

    return beat_positions_corrected, beat_times_corrected


[docs]def load_beats(beats_path):
    """Load rwc beat data from a file

    Args:
        beats_path (str): path to beats annotation file

    Returns:
        BeatData: beat data

    """
    if not os.path.exists(beats_path):
        raise IOError("beats_path {} does not exist".format(beats_path))

    beat_times = []  # timestamps of beat interval beginnings
    beat_positions = []  # beat position inside the bar

    with open(beats_path, "r") as fhandle:
        reader = csv.reader(fhandle, delimiter="\t")
        for line in reader:
            beat_times.append(float(line[0]) / 100.0)
            beat_positions.append(int(line[2]))
    beat_positions, beat_times = _position_in_bar(
        np.array(beat_positions), np.array(beat_times)
    )

    return annotations.BeatData(beat_times, beat_positions.astype(int))


def _duration_to_sec(duration):
    """Convert min:sec duration values to seconds

    Args:
        duration (str): duration in form min:sec

    Returns:
        float: duration in seconds

    """
    if type(duration) == str:
        if ":" in duration:
            if len(duration.split(":")) <= 2:
                minutes, secs = duration.split(":")
            else:
                minutes, secs, _ = duration.split(
                    ":"
                )  # mistake in annotation in RM-J044
            total_secs = float(minutes) * 60 + float(secs)
            return total_secs
    else:
        raise ValueError(
            "Expected duration to have type str, got {}".format(type(duration))
        )


[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The rwc_classical dataset
    """

    def __init__(self, data_home=None):
        super().__init__(
            data_home,
            index=DATA.index,
            name="rwc_classical",
            track_object=Track,
            bibtex=BIBTEX,
            remotes=REMOTES,
            download_info=DOWNLOAD_INFO,
            license_info=LICENSE_INFO,
        )

[docs]    @core.copy_docs(load_audio)
    def load_audio(self, *args, **kwargs):
        return load_audio(*args, **kwargs)

[docs]    @core.copy_docs(load_sections)
    def load_sections(self, *args, **kwargs):
        return load_sections(*args, **kwargs)

[docs]    @core.copy_docs(load_beats)
    def load_beats(self, *args, **kwargs):
        return load_beats(*args, **kwargs)