Source code for mirdata.mridangam_stroke

# -*- coding: utf-8 -*-

"""
Mridangam Stroke Dataset Loader

The Mridangam Stroke dataset is a collection of individual strokes of
the Mridangam in various tonics. The dataset comprises of 10 different
strokes played on Mridangams with 6 different tonic values. The audio
examples were recorded from a professional Carnatic percussionist in a
semi-anechoic studio conditions by Akshay Anantapadmanabhan.

Total audio samples: 6977

Used microphones:
* SM-58 microphones
* H4n ZOOM recorder.

Audio specifications
* Sampling frequency: 44.1 kHz
* Bit-depth: 16 bit
* Audio format: .wav

The dataset can be used for training models for each Mridangam stroke. The
presentation of the dataset took place on the IEEE International Conference
on Acoustics, Speech and Signal Processing (ICASSP 2013) on May 2013.
You can read the full publication here: https://repositori.upf.edu/handle/10230/25756

Mridangam Dataset is annotated by storing the informat of each track in their filenames.
The structure of the filename is:
<TrackID>__<AuthorName>__<StrokeName>-<Tonic>-<InstanceNum>.wav

The dataset is made available by CompMusic under a Creative Commons
Attribution 3.0 Unported (CC BY 3.0) License.

For more details, please visit: https://compmusic.upf.edu/mridangam-stroke-dataset
"""

import os
import librosa

from mirdata import download_utils
from mirdata import jams_utils
from mirdata import track
from mirdata import utils


DATASET_DIR = 'Mridangam-Stroke'


REMOTES = {
    'remote_data': download_utils.RemoteFileMetadata(
        filename='mridangam_stroke_1.5.zip',
        url='https://zenodo.org/record/4068196/files/mridangam_stroke_1.5.zip?download=1',
        checksum='39af55b2476b94c7946bec24331ec01a',  # the md5 checksum
        destination_dir=None,  # relative path for where to unzip the data, or None
    ),
}

DATA = utils.LargeData(
    'mridangam_stroke_index.json'
)


STROKE_DICT = {
    'bheem', 'cha', 'dheem', 'dhin', 'num', 'ta', 'tha', 'tham', 'thi', 'thom'
}


TONIC_DICT = {
    'B', 'C', 'C#', 'D', 'D#', 'E'
}


[docs]class Track(track.Track):
    """Mridangam Stroke track class
    Args:
        track_id (str): track id of the track
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets/Mridangam-Stroke`
    Attributes:
        track_id (str): track id
        audio_path (str): audio path
        stroke_name (str): name of the Mridangam stroke present in Track
        tonic (str): tonic of the stroke in the Track
    """

    def __init__(self, track_id, data_home=None):
        if track_id not in DATA.index:
            raise ValueError('{} is not a valid track ID in Example'.format(track_id))

        self.track_id = track_id

        if data_home is None:
            data_home = utils.get_default_dataset_path(DATASET_DIR)

        self._data_home = data_home
        self._track_paths = DATA.index[track_id]

        self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0])

        # Parse stroke name annotation from audio file name
        self.stroke_name = self.audio_path.split('__')[2].split('-')[0]
        assert self.stroke_name in STROKE_DICT, "Stroke {} not in stroke dictionary".format(self.stroke_name)

        # Parse tonic annotation from audio file name
        self.tonic = os.path.basename(os.path.dirname(self.audio_path))
        assert self.tonic in TONIC_DICT, "Tonic {} not in tonic dictionary".format(self.tonic)

    @property
    def audio(self):
        """(String): audio signal, sample rate"""
        return load_audio(self.audio_path)

[docs]    def to_jams(self):
        """Jams: the track's data in jams format"""
        return jams_utils.jams_converter(
            audio_path=self.audio_path,
            tags_open_data=[(self.stroke_name, 'stroke_name')],
            metadata={
                'tonic': self.tonic
            },
        )


[docs]def load_audio(audio_path):
    """Load a Mridangam Stroke Dataset audio file.
    Args:
        audio_path (str): path to audio file
    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file
    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))
    return librosa.load(audio_path, sr=44100, mono=True)


[docs]def download(data_home=None, force_overwrite=False, cleanup=True):
    """Download the Mridangam Stroke Dataset.
    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        info_message=None,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )


[docs]def validate(data_home=None, silence=False):
    """Validate if the stored dataset is a valid version
    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    Returns:
        missing_files (list): List of file paths that are in the dataset index
            but missing locally
        invalid_checksums (list): List of file paths that file exists in the dataset
            index but has a different checksum compare to the reference checksum
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    missing_files, invalid_checksums = utils.validator(
        DATA.index, data_home, silence=silence
    )
    return missing_files, invalid_checksums


[docs]def track_ids():
    """Return track ids
    Returns:
        (list): A list of track ids
    """
    return list(DATA.index.keys())


[docs]def load(data_home=None):
    """Load Mridangam Stroke dataset
    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    Returns:
        (dict): {`track_id`: track data}
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    data = {}
    for key in DATA.index.keys():
        data[key] = Track(key, data_home=data_home)
    return data


[docs]def cite():
    """Print the reference"""

    cite_data = """
=========== MLA ===========
Anantapadmanabhan, A., Bellur, A., & Murthy, H. A. 
"Modal analysis and transcription of strokes of the mridangam using non-negative matrix factorization" (2013)
IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)
========== Bibtex ==========
@article{Anantapadmanabhan2013,
    author = {Anantapadmanabhan, Akshay and Bellur, Ashwin and Murthy, Hema A.},
    doi = {10.1109/ICASSP.2013.6637633},
    isbn = {9781479903566},
    issn = {15206149},
    journal = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
    keywords = {Hidden Markov models, Modal Analysis, Mridangam, Non-negative Matrix Factorization,
    automatic transcription},
    pages = {181--185},
    title = {{Modal analysis and transcription of strokes of the mridangam using non-negative matrix factorization}},
    year = {2013}
}
"""
    print(cite_data)