Source code for mirdata.datasets.mridangam_stroke

"""Mridangam Stroke Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    The Mridangam Stroke dataset is a collection of individual strokes of
    the Mridangam in various tonics. The dataset comprises of 10 different
    strokes played on Mridangams with 6 different tonic values. The audio
    examples were recorded from a professional Carnatic percussionist in a
    semi-anechoic studio conditions by Akshay Anantapadmanabhan.

    Total audio samples: 6977

    Used microphones:

    * SM-58 microphones
    * H4n ZOOM recorder.

    Audio specifications:

    * Sampling frequency: 44.1 kHz
    * Bit-depth: 16 bit
    * Audio format: .wav

    The dataset can be used for training models for each Mridangam stroke. The
    presentation of the dataset took place on the IEEE International Conference
    on Acoustics, Speech and Signal Processing (ICASSP 2013) on May 2013.
    You can read the full publication here: https://repositori.upf.edu/handle/10230/25756

    Mridangam Dataset is annotated by storing the informat of each track in their filenames.
    The structure of the filename is:

    .. code-block:: bash

        <TrackID>__<AuthorName>__<StrokeName>-<Tonic>-<InstanceNum>.wav

    The dataset is made available by CompMusic under a Creative Commons
    Attribution 3.0 Unported (CC BY 3.0) License.

    For more details, please visit: https://compmusic.upf.edu/mridangam-stroke-dataset

"""

import os

from deprecated.sphinx import deprecated
import librosa
import numpy as np
from typing import BinaryIO, Optional, Tuple

from mirdata import core, download_utils, io, jams_utils

BIBTEX = """@article{Anantapadmanabhan2013,
    author = {Anantapadmanabhan, Akshay and Bellur, Ashwin and Murthy, Hema A.},
    doi = {10.1109/ICASSP.2013.6637633},
    isbn = {9781479903566},
    issn = {15206149},
    journal = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
    keywords = {Hidden Markov models, Modal Analysis, Mridangam, Non-negative Matrix Factorization,
    automatic transcription},
    pages = {181--185},
    title = {{Modal analysis and transcription of strokes of the mridangam using non-negative matrix factorization}},
    year = {2013}
}"""

INDEXES = {
    "default": "1.5",
    "test": "1.5",
    "1.5": core.Index(filename="mridangam_stroke_index_1.5.json"),
}

REMOTES = {
    "remote_data": download_utils.RemoteFileMetadata(
        filename="mridangam_stroke_1.5.zip",
        url="https://zenodo.org/record/4068196/files/mridangam_stroke_1.5.zip?download=1",
        checksum="39af55b2476b94c7946bec24331ec01a",  # the md5 checksum
    )
}


STROKE_DICT = {
    "bheem",
    "cha",
    "dheem",
    "dhin",
    "num",
    "ta",
    "tha",
    "tham",
    "thi",
    "thom",
}


TONIC_DICT = {"B", "C", "C#", "D", "D#", "E"}

LICENSE_INFO = "Creative Commons Attribution 3.0 Unported (CC BY 3.0) License."


[docs] class Track(core.Track): """Mridangam Stroke track class Args: track_id (str): track id of the track data_home (str): Local path where the dataset is stored. Attributes: track_id (str): track id audio_path (str): audio path stroke_name (str): name of the Mridangam stroke present in Track tonic (str): tonic of the stroke in the Track """ def __init__(self, track_id, data_home, dataset_name, index, metadata): super().__init__(track_id, data_home, dataset_name, index, metadata) self.audio_path = self.get_path("audio") # Parse stroke name annotation from audio file name self.stroke_name = self.audio_path.split("__")[2].split("-")[0] assert ( self.stroke_name in STROKE_DICT ), "Stroke {} not in stroke dictionary".format(self.stroke_name) # Parse tonic annotation from audio file name self.tonic = os.path.basename(os.path.dirname(self.audio_path)) assert self.tonic in TONIC_DICT, "Tonic {} not in tonic dictionary".format( self.tonic ) @property def audio(self) -> Optional[Tuple[np.ndarray, float]]: """The track's audio Returns: * np.ndarray - audio signal * float - sample rate """ return load_audio(self.audio_path)
[docs] def to_jams(self): """Get the track's data in jams format Returns: jams.JAMS: the track's data in jams format """ return jams_utils.jams_converter( audio_path=self.audio_path, tags_open_data=[(self.stroke_name, "stroke_name")], metadata={"tonic": self.tonic}, )
[docs] @io.coerce_to_bytes_io def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]: """Load a Mridangam Stroke Dataset audio file. Args: fhandle (str or file-like): File-like object or path to audio file Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ return librosa.load(fhandle, sr=44100, mono=True)
[docs] @core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """ The mridangam_stroke dataset """ def __init__(self, data_home=None, version="default"): super().__init__( data_home, version, name="mridangam_stroke", track_class=Track, bibtex=BIBTEX, indexes=INDEXES, remotes=REMOTES, license_info=LICENSE_INFO, )
[docs] @deprecated( reason="Use mirdata.datasets.mridangam_stroke.load_audio", version="0.3.4" ) def load_audio(self, *args, **kwargs): return load_audio(*args, **kwargs)