Source code for mirdata.datasets.compmusic_raga

"""CompMusic Raga Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    Rāga datasets from CompMusicomprise two sizable datasets, one for each music tradition, 
    Carnatic and Hindustani. These datasets comprise full length audio recordings and their 
    associated rāga labels. These two datasets can be used to develop and evaluate approaches 
    for performing automatic rāga recognition in Indian art music.

    These datasets are derived from the CompMusic corpora of Indian Art Music. Therefore, the
    dataset has been compiled at the Music Technology Group, by a group of researchers working
    on the computational analysis of Carnatic and Hindustani music within the framework of the
    ERC-funded CompMusic project. 
    
    Each recording is associated with a MBID. With the MBID other information can be obtained 
    using the Dunya API or pycompmusic. 

    The Carnatic subset comprises 124 hours of audio recordings and editorial metadata that 
    includes carefully curated and verified rāga labels. It contains 480 recordings belonging 
    to 40 rāgas with 12 recordings per rāga.

    The Hindustani subset comprises 116 hours of audio recordings and editorial metadata that 
    includes carefully curated and verified rāga labels. It contains 300 recordings belonging 
    to 30 rāgas with 10 recordings per rāga. 

    The dataset also includes features per each file:
    * Tonic: float indicating the recording tonic
    * Tonic fine tuned: float indicating the manually fine-tuned recording tonic
    * Predominant pitch: automatically-extracted predominant pitch time-series (timestamps and freq. values)
    * Post-processed pitch: automatically-extracted and post-processed predominant pitch time-series
    * Nyas segments: KNN-extracted segments of Nyas (start and end times provided)
    * Tani segments: KNN-extracted segments of Tanis (start and end times provided)

    The dataset includes both txt files and json files that contain information about each audio 
    recording in terms of its mbid, the path of the audio/feature files and the associated rāga 
    identifier. Each rāga is assigned a unique identifier by Dunya, which is similar to the mbid 
    in terms of purpose. A mapping of the rāga id to its transliterated name is also provided.

    For more information about the dataset please refer to: https://compmusic.upf.edu/node/328

"""

import os
import csv
import json

import librosa
import numpy as np

from mirdata import annotations, core, download_utils, io, jams_utils
from smart_open import open


BIBTEX = """
@article{gulati_2016,
  author       = {Gulati, Sankalp and Serrà, Joan and Kaustuv Kani, Ganguli 
                    and Sentürk, Sertan and Serra, Xavier},
  title        = {{Time-delayed melody surfaces for raga recognition}},
  year         = 2016,
  pages        = 751--757,
  journal      = {In Proceedings of the 17th International Society for Music Information 
                    Retrieval Conference (ISMIR), New York, USA},
}
"""

INDEXES = {
    "default": "1.0",
    "test": "1.0",
    "1.0": core.Index(filename="compmusic_raga_index_1.0.json"),
}

REMOTES = {
    "features": download_utils.RemoteFileMetadata(
        filename="Indian Art Music Raga Recognition Dataset (features).zip",
        url="https://zenodo.org/record/7278506/files/Indian%20Art%20Music%20Raga%20Recognition%20Dataset%20%28features%29.zip?download=1",
        checksum="5dfc26dd1c2652ab75a62faec7f45f08",
    )
}

DOWNLOAD_INFO = """While annotations and metadata are freely downloadable, the audio of this 
    dataset has restricted access. Please access: https://zenodo.org/record/7278511 and request 
    access to the audio, specifying your purpose. The audio will be shared for research purposes. 
    In such case, when access to the audio is granted, please organize the dataset as specified 
    in the ``directory_structure.txt`` file found when you download the features and metadata using
    the .download() method of this dataloader. 
"""

LICENSE_INFO = "Creative Commons Attribution 4.0 International"


[docs]class Track(core.Track): """CompMusic Raga Dataset class Args: track_id (str): track id of the track data_home (str): Local path where the dataset is stored. default=None If `None`, looks for the data in the default directory, `~/mir_datasets` Attributes: audio_path (str): path to audio file tonic_path (str): path to tonic annotation tonic_fine_tuned_path (str): path to tonic fine-tuned annotation pitch_path (str): path to pitch annotation pitch_post_processed_path (str): path to processed pitch annotation nyas_segments_path (str): path to nyas segments annotation tani_segments_path (str): path to tani segments annotation Cached Properties: tonic (float): tonic annotation tonic_fine_tuned (float): tonic fine-tuned annotation pitch (F0Data): pitch annotation pitch_post_processed (F0Data): processed pitch annotation nyas_segments (EventData): nyas segments annotation tani_segments (EventData): tani segments annotation recording (str): name of the recording concert (str): name of the concert artist (str): name of the artist mbid (str): mbid of the recording raga (str): raga in the recording ragaid (str): id of the raga in the recording tradition (str): tradition name (carnatic or hindustani) """ def __init__( self, track_id, data_home, dataset_name, index, metadata, ): super().__init__( track_id, data_home, dataset_name, index, metadata, ) # Audio path self.audio_path = self.get_path("audio") # Multitrack audio paths self.tonic_path = self.get_path("tonic") self.tonic_fine_tuned_path = self.get_path("tonic_fine_tuned") self.pitch_path = self.get_path("pitch") self.pitch_post_processed_path = self.get_path("pitch_post_processed") self.nyas_segments_path = self.get_path("nyas_segments") self.tani_segments_path = self.get_path("tani_segments") @core.cached_property def tonic(self): return load_tonic(self.tonic_path) @core.cached_property def tonic_fine_tuned(self): return load_tonic(self.tonic_fine_tuned_path) @core.cached_property def pitch(self): return load_pitch(self.pitch_path) @core.cached_property def pitch_post_processed(self): return load_pitch(self.pitch_post_processed_path) @core.cached_property def nyas_segments(self): return load_nyas_segments(self.nyas_segments_path) @core.cached_property def tani_segments(self): return load_tani_segments(self.tani_segments_path) @core.cached_property def recording(self): return self._track_metadata.get("recording") @core.cached_property def concert(self): return self._track_metadata.get("concert") @core.cached_property def artist(self): return self._track_metadata.get("artist") @core.cached_property def mbid(self): return self._track_metadata.get("mbid") @core.cached_property def raga(self): return self._track_metadata.get("raga") @core.cached_property def ragaid(self): return self._track_metadata.get("ragaid") @core.cached_property def tradition(self): return self._track_metadata.get("tradition") @property def audio(self): """The track's audio Returns: * np.ndarray - audio signal * float - sample rate """ return load_audio(self.audio_path)
[docs] def to_jams(self): """Get the track's data in jams format Returns: jams.JAMS: the track's data in jams format """ return jams_utils.jams_converter( audio_path=self.audio_path, f0_data=[ (self.pitch, "pitch"), (self.pitch_post_processed, "pitch_post_processed"), ], event_data=[ (self.nyas_segments, "nyas_segments"), (self.tani_segments, "tani_segments"), ], metadata={ "tonic": self.tonic, "tonic_fine_tuned": self.tonic_fine_tuned, "recording": self.recording, "concert": self.concert, "artist": self.artist, "raga": self.raga, "mbid": self.mbid, "ragaid": self.ragaid, "tradition": self.tradition, }, )
# no decorator here because of https://github.com/librosa/librosa/issues/1267
[docs]def load_audio(audio_path): """Load an audio file. Args: audio_path (str): path to audio file Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ if audio_path is None: return None return librosa.load(audio_path, sr=44100, mono=False)
[docs]@io.coerce_to_string_io def load_tonic(fhandle): """Load track absolute tonic Args: fhandle (str or file-like): Local path where the tonic path is stored. Returns: int: Tonic annotation in Hz """ reader = csv.reader(fhandle, delimiter="\t") tonic = float(next(reader)[0]) return tonic
[docs]@io.coerce_to_string_io def load_pitch(fhandle): """Load pitch Args: fhandle (str or file-like): Local path where the pitch annotation is stored. Returns: F0Data: pitch annotation """ times = [] freqs = [] reader = csv.reader(fhandle, delimiter="\t") for line in reader: times.append(float(line[0])) freqs.append(float(line[1])) if not times: return None times = np.array(times) freqs = np.array(freqs) voicing = (freqs > 0).astype(float) return annotations.F0Data(times, "s", freqs, "hz", voicing, "binary")
[docs]@io.coerce_to_string_io def load_nyas_segments(fhandle): """Load nyas segments Args: fhandle (str or file-like): Local path where the nyas segments annotation is stored. Returns: EventData: segment annotation """ intervals = [] events = [] reader = csv.reader(fhandle, delimiter="\t") for line in reader: if len(line) == 1: line = line[0].split(" ") intervals.append([float(line[0]), float(line[1])]) events.append("nyas") if not intervals: return None intervals = np.array(intervals) events = events return annotations.EventData(intervals, "s", events, "open")
[docs]@io.coerce_to_string_io def load_tani_segments(fhandle): """Load tani segments Args: fhandle (str or file-like): Local path where the tani segments annotation is stored. Returns: EventData: segment annotation """ intervals = [] events = [] reader = csv.reader(fhandle, delimiter="\t") for line in reader: if len(line) == 1: line = line[0].split(" ") intervals.append([float(line[0]), float(line[1])]) events.append("tani") if not intervals: return None intervals = np.array(intervals) events = events return annotations.EventData(intervals, "s", events, "open")
[docs]@core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """ The compmusic_raga dataset """ def __init__(self, data_home=None, version="default"): super().__init__( data_home, version, name="compmusic_raga", track_class=Track, bibtex=BIBTEX, indexes=INDEXES, remotes=REMOTES, download_info=DOWNLOAD_INFO, license_info=LICENSE_INFO, ) @core.cached_property def _metadata(self): carnatic_metadata_path = os.path.join( self.data_home, "RagaDataset", "Carnatic", "_info_", "path_mbid_ragaid.json", ) hindustani_metadata_path = os.path.join( self.data_home, "RagaDataset", "Hindustani", "_info_", "path_mbid_ragaid.json", ) carnatic_mapping_path = os.path.join( self.data_home, "RagaDataset", "Carnatic", "_info_", "ragaId_to_ragaName_mapping.json", ) hindustani_mapping_path = os.path.join( self.data_home, "RagaDataset", "Hindustani", "_info_", "ragaId_to_ragaName_mapping.json", ) metadata = {} metadata = self.get_metadata( metadata, carnatic_metadata_path, carnatic_mapping_path, "carnatic" ) metadata = self.get_metadata( metadata, hindustani_metadata_path, hindustani_mapping_path, "hindustani" ) return metadata @staticmethod def get_metadata(metadata, metadata_path, mapping_path, tradition): try: with open(mapping_path, "r", errors="ignore") as fhandle: mapping = json.load(fhandle) except FileNotFoundError: raise FileNotFoundError("Metadata not found. Did you run .download()?") try: with open(metadata_path, "r", errors="ignore") as fhandle: meta = json.load(fhandle) for song in list(meta.keys()): song_name = meta[song]["path"].split("/")[-1] concert_name = meta[song]["path"].split("/")[-3] artist_name = meta[song]["path"].split("/")[-4] song_mbid = meta[song]["mbid"] ragaid = meta[song]["ragaid"] metadata[artist_name + "." + song_name] = { "recording": song_name, "concert": concert_name, "artist": artist_name, "mbid": song_mbid, "raga": mapping[ragaid], "ragaid": ragaid, "tradition": tradition, } except FileNotFoundError: raise FileNotFoundError("Metadata not found. Did you run .download()?") return metadata