Source code for mirdata.datasets.compmusic_raga

"""CompMusic Raga Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    Rāga datasets from CompMusicomprise two sizable datasets, one for each music tradition, 
    Carnatic and Hindustani. These datasets comprise full length audio recordings and their 
    associated rāga labels. These two datasets can be used to develop and evaluate approaches 
    for performing automatic rāga recognition in Indian art music.

    These datasets are derived from the CompMusic corpora of Indian Art Music. Therefore, the
    dataset has been compiled at the Music Technology Group, by a group of researchers working
    on the computational analysis of Carnatic and Hindustani music within the framework of the
    ERC-funded CompMusic project. 
    
    Each recording is associated with a MBID. With the MBID other information can be obtained 
    using the Dunya API or pycompmusic. 

    The Carnatic subset comprises 124 hours of audio recordings and editorial metadata that 
    includes carefully curated and verified rāga labels. It contains 480 recordings belonging 
    to 40 rāgas with 12 recordings per rāga.

    The Hindustani subset comprises 116 hours of audio recordings and editorial metadata that 
    includes carefully curated and verified rāga labels. It contains 300 recordings belonging 
    to 30 rāgas with 10 recordings per rāga. 

    The dataset also includes features per each file:
    * Tonic: float indicating the recording tonic
    * Tonic fine tuned: float indicating the manually fine-tuned recording tonic
    * Predominant pitch: automatically-extracted predominant pitch time-series (timestamps and freq. values)
    * Post-processed pitch: automatically-extracted and post-processed predominant pitch time-series
    * Nyas segments: KNN-extracted segments of Nyas (start and end times provided)
    * Tani segments: KNN-extracted segments of Tanis (start and end times provided)

    The dataset includes both txt files and json files that contain information about each audio 
    recording in terms of its mbid, the path of the audio/feature files and the associated rāga 
    identifier. Each rāga is assigned a unique identifier by Dunya, which is similar to the mbid 
    in terms of purpose. A mapping of the rāga id to its transliterated name is also provided.

    For more information about the dataset please refer to: https://compmusic.upf.edu/node/328

"""

import os
import csv
import json

import librosa
import numpy as np

from mirdata import annotations, core, download_utils, io, jams_utils
from smart_open import open


BIBTEX = """
@article{gulati_2016,
  author       = {Gulati, Sankalp and Serrà, Joan and Kaustuv Kani, Ganguli 
                    and Sentürk, Sertan and Serra, Xavier},
  title        = {{Time-delayed melody surfaces for raga recognition}},
  year         = 2016,
  pages        = 751--757,
  journal      = {In Proceedings of the 17th International Society for Music Information 
                    Retrieval Conference (ISMIR), New York, USA},
}
"""

INDEXES = {
    "default": "1.0",
    "test": "1.0",
    "1.0": core.Index(filename="compmusic_raga_index_1.0.json"),
}

REMOTES = {
    "features": download_utils.RemoteFileMetadata(
        filename="Indian Art Music Raga Recognition Dataset (features).zip",
        url="https://zenodo.org/record/7278506/files/Indian%20Art%20Music%20Raga%20Recognition%20Dataset%20%28features%29.zip?download=1",
        checksum="5dfc26dd1c2652ab75a62faec7f45f08",
    )
}

DOWNLOAD_INFO = """While annotations and metadata are freely downloadable, the audio of this 
    dataset has restricted access. Please access: https://zenodo.org/record/7278511 and request 
    access to the audio, specifying your purpose. The audio will be shared for research purposes. 
    In such case, when access to the audio is granted, please organize the dataset as specified 
    in the ``directory_structure.txt`` file found when you download the features and metadata using
    the .download() method of this dataloader. 
"""

LICENSE_INFO = "Creative Commons Attribution 4.0 International"


[docs]class Track(core.Track):
    """CompMusic Raga Dataset class

    Args:
        track_id (str): track id of the track
        data_home (str): Local path where the dataset is stored. default=None
            If `None`, looks for the data in the default directory, `~/mir_datasets`

    Attributes:
        audio_path (str): path to audio file
        tonic_path (str): path to tonic annotation
        tonic_fine_tuned_path (str): path to tonic fine-tuned annotation
        pitch_path (str): path to pitch annotation
        pitch_post_processed_path (str): path to processed pitch annotation
        nyas_segments_path (str): path to nyas segments annotation
        tani_segments_path (str): path to tani segments annotation

    Cached Properties:
        tonic (float): tonic annotation
        tonic_fine_tuned (float): tonic fine-tuned annotation
        pitch (F0Data): pitch annotation
        pitch_post_processed (F0Data): processed pitch annotation
        nyas_segments (EventData): nyas segments annotation
        tani_segments (EventData): tani segments annotation
        recording (str): name of the recording
        concert (str): name of the concert
        artist (str): name of the artist
        mbid (str): mbid of the recording
        raga (str): raga in the recording
        ragaid (str): id of the raga in the recording
        tradition (str): tradition name (carnatic or hindustani)
    """

    def __init__(
        self,
        track_id,
        data_home,
        dataset_name,
        index,
        metadata,
    ):
        super().__init__(
            track_id,
            data_home,
            dataset_name,
            index,
            metadata,
        )

        # Audio path
        self.audio_path = self.get_path("audio")

        # Multitrack audio paths
        self.tonic_path = self.get_path("tonic")
        self.tonic_fine_tuned_path = self.get_path("tonic_fine_tuned")
        self.pitch_path = self.get_path("pitch")
        self.pitch_post_processed_path = self.get_path("pitch_post_processed")
        self.nyas_segments_path = self.get_path("nyas_segments")
        self.tani_segments_path = self.get_path("tani_segments")

    @core.cached_property
    def tonic(self):
        return load_tonic(self.tonic_path)

    @core.cached_property
    def tonic_fine_tuned(self):
        return load_tonic(self.tonic_fine_tuned_path)

    @core.cached_property
    def pitch(self):
        return load_pitch(self.pitch_path)

    @core.cached_property
    def pitch_post_processed(self):
        return load_pitch(self.pitch_post_processed_path)

    @core.cached_property
    def nyas_segments(self):
        return load_nyas_segments(self.nyas_segments_path)

    @core.cached_property
    def tani_segments(self):
        return load_tani_segments(self.tani_segments_path)

    @core.cached_property
    def recording(self):
        return self._track_metadata.get("recording")

    @core.cached_property
    def concert(self):
        return self._track_metadata.get("concert")

    @core.cached_property
    def artist(self):
        return self._track_metadata.get("artist")

    @core.cached_property
    def mbid(self):
        return self._track_metadata.get("mbid")

    @core.cached_property
    def raga(self):
        return self._track_metadata.get("raga")

    @core.cached_property
    def ragaid(self):
        return self._track_metadata.get("ragaid")

    @core.cached_property
    def tradition(self):
        return self._track_metadata.get("tradition")

    @property
    def audio(self):
        """The track's audio

        Returns:
           * np.ndarray - audio signal
           * float - sample rate

        """
        return load_audio(self.audio_path)

[docs]    def to_jams(self):
        """Get the track's data in jams format

        Returns:
            jams.JAMS: the track's data in jams format

        """
        return jams_utils.jams_converter(
            audio_path=self.audio_path,
            f0_data=[
                (self.pitch, "pitch"),
                (self.pitch_post_processed, "pitch_post_processed"),
            ],
            event_data=[
                (self.nyas_segments, "nyas_segments"),
                (self.tani_segments, "tani_segments"),
            ],
            metadata={
                "tonic": self.tonic,
                "tonic_fine_tuned": self.tonic_fine_tuned,
                "recording": self.recording,
                "concert": self.concert,
                "artist": self.artist,
                "raga": self.raga,
                "mbid": self.mbid,
                "ragaid": self.ragaid,
                "tradition": self.tradition,
            },
        )


# no decorator here because of https://github.com/librosa/librosa/issues/1267
[docs]def load_audio(audio_path):
    """Load an audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    if audio_path is None:
        return None
    return librosa.load(audio_path, sr=44100, mono=False)


[docs]@io.coerce_to_string_io
def load_tonic(fhandle):
    """Load track absolute tonic

    Args:
        fhandle (str or file-like): Local path where the tonic path is stored.

    Returns:
        int: Tonic annotation in Hz

    """
    reader = csv.reader(fhandle, delimiter="\t")
    tonic = float(next(reader)[0])
    return tonic


[docs]@io.coerce_to_string_io
def load_pitch(fhandle):
    """Load pitch

    Args:
        fhandle (str or file-like): Local path where the pitch annotation is stored.

    Returns:
        F0Data: pitch annotation

    """
    times = []
    freqs = []

    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        times.append(float(line[0]))
        freqs.append(float(line[1]))

    if not times:
        return None

    times = np.array(times)
    freqs = np.array(freqs)
    voicing = (freqs > 0).astype(float)
    return annotations.F0Data(times, "s", freqs, "hz", voicing, "binary")


[docs]@io.coerce_to_string_io
def load_nyas_segments(fhandle):
    """Load nyas segments

    Args:
        fhandle (str or file-like): Local path where the nyas segments annotation is stored.

    Returns:
        EventData: segment annotation

    """
    intervals = []
    events = []

    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        if len(line) == 1:
            line = line[0].split(" ")
        intervals.append([float(line[0]), float(line[1])])
        events.append("nyas")

    if not intervals:
        return None

    intervals = np.array(intervals)
    events = events
    return annotations.EventData(intervals, "s", events, "open")


[docs]@io.coerce_to_string_io
def load_tani_segments(fhandle):
    """Load tani segments

    Args:
        fhandle (str or file-like): Local path where the tani segments annotation is stored.

    Returns:
        EventData: segment annotation

    """
    intervals = []
    events = []

    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        if len(line) == 1:
            line = line[0].split(" ")
        intervals.append([float(line[0]), float(line[1])])
        events.append("tani")

    if not intervals:
        return None

    intervals = np.array(intervals)
    events = events
    return annotations.EventData(intervals, "s", events, "open")


[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The compmusic_raga dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="compmusic_raga",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            download_info=DOWNLOAD_INFO,
            license_info=LICENSE_INFO,
        )

    @core.cached_property
    def _metadata(self):
        carnatic_metadata_path = os.path.join(
            self.data_home,
            "RagaDataset",
            "Carnatic",
            "_info_",
            "path_mbid_ragaid.json",
        )
        hindustani_metadata_path = os.path.join(
            self.data_home,
            "RagaDataset",
            "Hindustani",
            "_info_",
            "path_mbid_ragaid.json",
        )
        carnatic_mapping_path = os.path.join(
            self.data_home,
            "RagaDataset",
            "Carnatic",
            "_info_",
            "ragaId_to_ragaName_mapping.json",
        )
        hindustani_mapping_path = os.path.join(
            self.data_home,
            "RagaDataset",
            "Hindustani",
            "_info_",
            "ragaId_to_ragaName_mapping.json",
        )

        metadata = {}
        metadata = self.get_metadata(
            metadata, carnatic_metadata_path, carnatic_mapping_path, "carnatic"
        )
        metadata = self.get_metadata(
            metadata, hindustani_metadata_path, hindustani_mapping_path, "hindustani"
        )
        return metadata

    @staticmethod
    def get_metadata(metadata, metadata_path, mapping_path, tradition):
        try:
            with open(mapping_path, "r", errors="ignore") as fhandle:
                mapping = json.load(fhandle)
        except FileNotFoundError:
            raise FileNotFoundError("Metadata not found. Did you run .download()?")

        try:
            with open(metadata_path, "r", errors="ignore") as fhandle:
                meta = json.load(fhandle)
                for song in list(meta.keys()):
                    song_name = meta[song]["path"].split("/")[-1]
                    concert_name = meta[song]["path"].split("/")[-3]
                    artist_name = meta[song]["path"].split("/")[-4]
                    song_mbid = meta[song]["mbid"]
                    ragaid = meta[song]["ragaid"]
                    metadata[artist_name + "." + song_name] = {
                        "recording": song_name,
                        "concert": concert_name,
                        "artist": artist_name,
                        "mbid": song_mbid,
                        "raga": mapping[ragaid],
                        "ragaid": ragaid,
                        "tradition": tradition,
                    }
        except FileNotFoundError:
            raise FileNotFoundError("Metadata not found. Did you run .download()?")
        return metadata