Source code for mirdata.datasets.tonality_classicaldb

"""Tonality classicalDB Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    The Tonality classicalDB Dataset includes 881 classical musical pieces across different styles from s.XVII to s.XX
    annotated with single-key labels.

    Tonality classicalDB Dataset was created as part of:

    .. code-block:: latex

        Gómez, E. (2006). PhD Thesis. Tonal description of music audio signals.
        Department of Information and Communication Technologies.

    This dataset is mainly intended to assess the performance of computational key estimation algorithms in classical music.

    2020 note: The audio is privates. If you don't have the original audio collection, you could create it from your private collection
    because most of the recordings are well known. To this end, we provide musicbrainz metadata. Moreover, we have added the spectrum and
    HPCP chromagram of each audio.

    This dataset can be used with mirdata library:
    https://github.com/mir-dataset-loaders/mirdata

    Spectrum features have been computed as is shown here:
    https://github.com/mir-dataset-loaders/mirdata-notebooks/blob/master/Tonality_classicalDB/ClassicalDB_spectrum_features.ipynb

    HPCP chromagram has been computed as is shown here:
    https://github.com/mir-dataset-loaders/mirdata-notebooks/blob/master/Tonality_classicalDB/ClassicalDB_HPCP_features.ipynb

    Musicbrainz metadata has been computed as is shown here:
    https://github.com/mir-dataset-loaders/mirdata-notebooks/blob/master/Tonality_classicalDB/ClassicalDB_musicbrainz_metadata.ipynb

"""

import csv
import json
from typing import Any, BinaryIO, Dict, Optional, TextIO, Tuple

from deprecated.sphinx import deprecated
import librosa
import numpy as np

from mirdata import core, download_utils, io

BIBTEX = """@article{gomez2006tonal,
  title={Tonal description of music audio signals},
  author={G{\'o}mez, Emilia},
  journal={Department of Information and Communication Technologies},
  year={2006}
}"""

INDEXES = {
    "default": "1.0",
    "test": "sample",
    "1.0": core.Index(
        filename="tonality_classicaldb_index_1.0.json",
        url="https://zenodo.org/records/13993012/files/tonality_classicaldb_index_1.0.json?download=1",
        checksum="5f73a3bf0beb43d3ecae414888d5bdf5",
    ),
    "sample": core.Index(filename="tonality_classicaldb_index_1.0_sample.json"),
}
REMOTES = {
    "keys": download_utils.RemoteFileMetadata(
        filename="keys.zip",
        url="https://zenodo.org/record/4283868/files/keys.zip?download=1",
        checksum="5d58978783de846f9cb337352e7d2612",
        destination_dir=".",
    ),
    "musicbrainz_metadata": download_utils.RemoteFileMetadata(
        filename="musicbrainz_metadata.zip",
        url="https://zenodo.org/record/4283868/files/musicbrainz_metadata.zip?download=1",
        checksum="4a77ecc6a9410a59feeffa1152cb6edc",
        destination_dir=".",
    ),
    "HPCPs": download_utils.RemoteFileMetadata(
        filename="HPCPs.zip",
        url="https://zenodo.org/record/4283868/files/HPCPs.zip?download=1",
        checksum="66d1ca70376109a42d0bac1306691599",
        destination_dir=".",
    ),
    "spectrums": download_utils.RemoteFileMetadata(
        filename="spectrums.zip",
        url="https://zenodo.org/record/4283868/files/spectrums.zip?download=1",
        checksum="63a79033d608ba95fb559a33e2f70d3a",
        destination_dir=".",
    ),
}
DOWNLOAD_INFO = """
    Unfortunately the audio files of the Tonality classicalDB dataset are not available
    for download. If you have the tonality classicalDB audio dataset, place the contents into
    a folder called classicaldb with the following structure:
        > classicaldb/
            > audio/
            > keys/
            > spectrums/
            > HPCPs/
            > musicbrainz_metadata/
    and copy the folder to {} directory
"""

LICENSE_INFO = (
    "Creative Commons Attribution Non Commercial Share Alike 4.0 International."
)



[docs]
class Track(core.Track):
    """tonality_classicaldb track class

    Args:
        track_id (str): track id of the track

    Attributes:
        audio_path (str): track audio path
        key_path (str): key annotation path
        title (str): title of the track
        track_id (str): track id

    Cached Properties:
        key (str): key annotation
        spectrum (np.array): computed audio spectrum
        hpcp (np.array): computed hpcp
        musicbrainz_metadata (dict): MusicBrainz metadata

    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.key_path = self.get_path("key")
        self.spectrum_path = self.get_path("spectrum")
        self.musicbrainz_path = self.get_path("mb")
        self.hpcp_path = self.get_path("HPCP")

        self.audio_path = self.get_path("audio")

        self.title = self.audio_path.replace(".wav", "").split("/")[-1]

    @core.cached_property
    def key(self) -> Optional[str]:
        return load_key(self.key_path)

    @core.cached_property
    def spectrum(self) -> Optional[np.ndarray]:
        return load_spectrum(self.spectrum_path)

    @core.cached_property
    def hpcp(self) -> Optional[np.ndarray]:
        return load_hpcp(self.hpcp_path)

    @core.cached_property
    def musicbrainz_metadata(self) -> Optional[Dict[Any, Any]]:
        return load_musicbrainz(self.musicbrainz_path)

    @property
    def audio(self) -> Optional[Tuple[np.ndarray, float]]:
        """The track's audio

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_path)




[docs]
@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
    """Load a Tonality classicalDB audio file.

    Args:
        fhandle (str or file-like): File-like object or path to audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    return librosa.load(fhandle, sr=None, mono=True)




[docs]
@io.coerce_to_string_io
def load_key(fhandle: TextIO) -> str:
    """Load Tonality classicalDB format key data from a file

    Args:
        fhandle (str or file-like): File-like object or path to key annotation file

    Returns:
        str: musical key data
    """
    reader = csv.reader(fhandle, delimiter="\n")
    key = next(reader)[0]

    return key.replace("\t", " ")




[docs]
@io.coerce_to_string_io
def load_spectrum(fhandle: TextIO) -> np.ndarray:
    """Load Tonality classicalDB spectrum data from a file

    Args:
        fhandle (str or file-like): File-like object or path to spectrum file

    Returns:
        np.ndarray: spectrum data

    """
    data = json.load(fhandle)
    spectrum = [list(map(complex, x)) for x in data["spectrum"]]
    return np.array(spectrum)




[docs]
@io.coerce_to_string_io
def load_hpcp(fhandle: TextIO) -> np.ndarray:
    """Load Tonality classicalDB HPCP feature from a file

    Args:
        fhandle (str or file-like): File-like object or path to HPCP file

    Returns:
        np.ndarray: loaded HPCP data

    """
    data = json.load(fhandle)
    return np.array(data["hpcp"])




[docs]
@io.coerce_to_string_io
def load_musicbrainz(fhandle: TextIO) -> Dict[Any, Any]:
    """Load Tonality classicalDB musicbraiz metadata from a file

    Args:
        fhandle (str or file-like): File-like object or path to musicbrainz metadata file

    Returns:
        dict: musicbrainz metadata

    """
    return json.load(fhandle)




[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The tonality_classicaldb dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="tonality_classicaldb",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            download_info=DOWNLOAD_INFO,
            license_info=LICENSE_INFO,
        )


[docs]
    @deprecated(
        reason="Use mirdata.datasets.tonality_classicaldb.load_audio", version="0.3.4"
    )
    def load_audio(self, *args, **kwargs):
        return load_audio(*args, **kwargs)



[docs]
    @deprecated(
        reason="Use mirdata.datasets.tonality_classicaldb.load_key", version="0.3.4"
    )
    def load_key(self, *args, **kwargs):
        return load_key(*args, **kwargs)



[docs]
    @deprecated(
        reason="Use mirdata.datasets.tonality_classicaldb.load_spectrum",
        version="0.3.4",
    )
    def load_spectrum(self, *args, **kwargs):
        return load_spectrum(*args, **kwargs)



[docs]
    @deprecated(
        reason="Use mirdata.datasets.tonality_classicaldb.load_hpcp", version="0.3.4"
    )
    def load_hpcp(self, *args, **kwargs):
        return load_hpcp(*args, **kwargs)



[docs]
    @deprecated(
        reason="Use mirdata.datasets.tonality_classicaldb.load_musicbrainz",
        version="0.3.4",
    )
    def load_musicbrainz(self, *args, **kwargs):
        return load_musicbrainz(*args, **kwargs)