Source code for mirdata.datasets.cante100

"""
cante100 Loader

.. admonition:: Dataset Info
    :class: dropdown

    The cante100 dataset contains 100 tracks taken from the COFLA corpus. We defined 10 style
    families of which 10 tracks each are included. Apart from the style family, we manually
    annotated the sections of the track in which the vocals are present. In addition, we
    provide a number of low-level descriptors and the fundamental frequency corresponding to
    the predominant melody for each track. The meta-information includes editoral meta-data
    and the musicBrainz ID.

    Total tracks: 100

    cante100 audio is only available upon request. To download the audio request access in
    this link: https://zenodo.org/record/1324183. Then
    unzip the audio into the cante100 general dataset folder for the rest of annotations
    and files.

    Audio specifications:

    - Sampling frequency: 44.1 kHz
    - Bit-depth: 16 bit
    - Audio format: .mp3

    cante100 dataset has spectrogram available, in csv format. spectrogram is available to download
    without request needed, so at first instance, cante100 loader uses the spectrogram of the tracks.

    The available annotations are:

    - F0 (predominant melody)
    - Automatic transcription of notes (of singing voice)

    CANTE100 LICENSE (COPIED FROM ZENODO PAGE)

    .. code-block:: latex

        The provided datasets are offered free of charge for internal non-commercial use.
        We do not grant any rights for redistribution or modification. All data collections were gathered
        by the COFLA team.
        © COFLA 2015. All rights reserved.

    For more details, please visit: http://www.cofla-project.com/?page_id=134

"""

import csv
import os
import xml.etree.ElementTree as ET
from typing import Optional, TextIO, Tuple

from deprecated.sphinx import deprecated
import librosa
import numpy as np
from smart_open import open

from mirdata import download_utils
from mirdata import jams_utils
from mirdata import core
from mirdata import annotations
from mirdata import io


BIBTEX = """@dataset{nadine_kroher_2018_1322542,
  author       = {Nadine Kroher and
                  José Miguel Díaz-Báñez and
                  Joaquin Mora and
                  Emilia Gómez},
  title        = {cante100 Metadata},
  month        = jul,
  year         = 2018,
  publisher    = {Zenodo},
  version      = {1.0},
  doi          = {10.5281/zenodo.1322542},
  url          = {https://doi.org/10.5281/zenodo.1322542}
},
@dataset{nadine_kroher_2018_1324183,
  author       = {Nadine Kroher and
                  José Miguel Díaz-Báñez and
                  Joaquin Mora and
                  Emilia Gómez},
  title        = {cante100 Audio},
  month        = jul,
  year         = 2018,
  publisher    = {Zenodo},
  version      = {1.0},
  doi          = {10.5281/zenodo.1324183},
  url          = {https://doi.org/10.5281/zenodo.1324183}
}
"""

INDEXES = {
    "default": "1.0",
    "test": "1.0",
    "1.0": core.Index(filename="cante100_index_1.0.json"),
}

REMOTES = {
    "spectrogram": download_utils.RemoteFileMetadata(
        filename="cante100_spectrum.zip",
        url="https://zenodo.org/record/1322542/files/cante100_spectrum.zip?download=1",
        checksum="0b81fe0fd7ab2c1adc1ad789edb12981",  # the md5 checksum
        destination_dir="cante100_spectrum",  # relative path for where to unzip the data, or None
    ),
    "melody": download_utils.RemoteFileMetadata(
        filename="cante100midi_f0.zip",
        url="https://zenodo.org/record/1322542/files/cante100midi_f0.zip?download=1",
        checksum="cce543b5125eda5a984347b55fdcd5e8",  # the md5 checksum
        destination_dir="cante100midi_f0",  # relative path for where to unzip the data, or None
    ),
    "notes": download_utils.RemoteFileMetadata(
        filename="cante100_automaticTranscription.zip",
        url=(
            "https://zenodo.org/record/1322542/files/cante100_automaticTranscription.zip?download=1"
        ),
        checksum="47fea64c744f9fe678ae5642a8f0ee8e",  # the md5 checksum
        destination_dir="cante100_automaticTranscription",  # relative path for where to unzip the data, or None
    ),
    "metadata": download_utils.RemoteFileMetadata(
        filename="cante100Meta.xml",
        url="https://zenodo.org/record/1322542/files/cante100Meta.xml?download=1",
        checksum="6cce186ce77a06541cdb9f0a671afb46",  # the md5 checksum
    ),
    "README": download_utils.RemoteFileMetadata(
        filename="cante100_README.txt",
        url="https://zenodo.org/record/1322542/files/cante100_README.txt?download=1",
        checksum="184209b7e7d816fa603f0c7f481c0aae",  # the md5 checksum
    ),
}


DOWNLOAD_INFO = """
        This loader is designed to load the spectrum, as it is available for download.
        However, the loader supports audio as well. Unfortunately the audio files of the
        cante100 dataset are not available for free download, but upon request. However,
        you can request de audio in both links here:
        ==> http://www.cofla-project.com/?page_id=208
        ==> https://zenodo.org/record/1324183
        Then, locate the downloaded the cante100audio folder like this:
            > cante100/
                > cante100_spectrum/
                ... (rest of the annotation folders)
                > cante100audio/
        Remember to locate the cante100 folder to {}
"""

LICENSE_INFO = """
The provided datasets are offered free of charge for internal non-commercial use.
We do not grant any rights for redistribution or modification. All data collections
were gathered by the COFLA team. COFLA 2015. All rights reserved.
"""


[docs] class Track(core.Track): """cante100 track class Args: track_id (str): track id of the track data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets/cante100` Attributes: track_id (str): track id identifier (str): musicbrainz id of the track artist (str): performing artists title (str): title of the track song release (str): release where the track can be found duration (str): duration in seconds of the track Cached Properties: melody (F0Data): annotated melody notes (NoteData): annotated notes """ def __init__(self, track_id, data_home, dataset_name, index, metadata): super().__init__(track_id, data_home, dataset_name, index, metadata) self.spectrogram_path = self.get_path("spectrum") self.f0_path = self.get_path("f0") self.notes_path = self.get_path("notes") self.audio_path = self.get_path("audio") @property def identifier(self): return self._track_metadata.get("musicBrainzID") @property def artist(self): return self._track_metadata.get("artist") @property def title(self): return self._track_metadata.get("title") @property def release(self): return self._track_metadata.get("release") @property def duration(self): return self._track_metadata.get("duration") @property def audio(self) -> Tuple[np.ndarray, float]: """The track's audio Returns: * np.ndarray - audio signal * float - sample rate """ return load_audio(self.audio_path) @property def spectrogram(self) -> Optional[np.ndarray]: """spectrogram of The track's audio Returns: np.ndarray: spectrogram """ return load_spectrogram(self.spectrogram_path) @core.cached_property def melody(self) -> Optional[annotations.F0Data]: return load_melody(self.f0_path) @core.cached_property def notes(self) -> Optional[annotations.NoteData]: return load_notes(self.notes_path)
[docs] def to_jams(self): """Get the track's data in jams format Returns: jams.JAMS: the track's data in jams format """ return jams_utils.jams_converter( audio_path=self.audio_path, spectrogram_path=self.spectrogram_path, f0_data=[(self.melody, "pitch_contour")], note_data=[(self.notes, "note_hz")], metadata=self._track_metadata, )
[docs] @io.coerce_to_string_io def load_spectrogram(fhandle: TextIO) -> np.ndarray: """Load a cante100 dataset spectrogram file. Args: fhandle (str or file-like): path or file-like object pointing to an audio file Returns: np.ndarray: spectrogram """ parsed_spectrogram = np.genfromtxt(fhandle, delimiter=" ") spectrogram = parsed_spectrogram.astype(np.float64) return spectrogram
# no decorator here because of https://github.com/librosa/librosa/issues/1267
[docs] def load_audio(fpath: str) -> Tuple[np.ndarray, float]: """Load a cante100 audio file. Args: fpath (str): path to audio file Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ return librosa.load(fpath, sr=22050, mono=False)
[docs] @io.coerce_to_string_io def load_melody(fhandle: TextIO) -> Optional[annotations.F0Data]: """Load cante100 f0 annotations Args: fhandle (str or file-like): path or file-like object pointing to melody annotation file Returns: F0Data: predominant melody """ times = [] freqs = [] voicing = [] reader = csv.reader(fhandle, delimiter=",") for line in reader: times.append(float(line[0])) freq_val = float(line[1]) freqs.append(np.abs(freq_val)) voicing.append(float(freq_val > 0)) times = np.array(times) # type: ignore freqs = np.array(freqs) # type: ignore voicing = np.array(voicing) # type: ignore return annotations.F0Data(times, "s", freqs, "hz", voicing, "binary")
[docs] @io.coerce_to_string_io def load_notes(fhandle: TextIO) -> annotations.NoteData: """Load note data from the annotation files Args: fhandle (str or file-like): path or file-like object pointing to a notes annotation file Returns: NoteData: note annotations """ intervals = [] pitches = [] confidence = [] reader = csv.reader(fhandle, delimiter=",") for line in reader: intervals.append([line[0], float(line[0]) + float(line[1])]) # Convert midi value to frequency pitches.append((440 / 32) * (2 ** ((int(line[2]) - 9) / 12))) confidence.append(1.0) return annotations.NoteData( np.array(intervals, dtype="float"), "s", np.array(pitches, dtype="float"), "hz", np.array(confidence, dtype="float"), "binary", )
[docs] @core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """ The cante100 dataset """ def __init__(self, data_home=None, version="default"): super().__init__( data_home, version, name="cante100", track_class=Track, bibtex=BIBTEX, indexes=INDEXES, remotes=REMOTES, download_info=DOWNLOAD_INFO, license_info=LICENSE_INFO, ) @core.cached_property def _metadata(self): metadata_path = os.path.join(self.data_home, "cante100Meta.xml") try: with open(metadata_path, "r") as fhandle: tree = ET.parse(fhandle) except FileNotFoundError: raise FileNotFoundError("Metadata not found. Did you run .download()?") root = tree.getroot() # ids indexes = [] for child in root: index = child.attrib.get("id") if len(index) == 1: index = "00" + index indexes.append(index) continue if len(index) == 2: index = "0" + index indexes.append(index) continue else: indexes.append(index) # musicBrainzID identifiers = [ident.text for ident in root.iter("musicBrainzID")] # artist artists = [artist.text for artist in root.iter("artist")] # titles titles = [title.text for title in root.iter("title")] # releases releases = [release.text for release in root.iter("anthology")] # duration minutes = [float(minute.text) * 60 for minute in root.iter("duration_m")] seconds = [float(second.text) for second in root.iter("duration_s")] durations = [m + s for (m, s) in zip(minutes, seconds)] metadata = dict() for i, j in zip(indexes, range(len(artists))): metadata[i] = { "musicBrainzID": identifiers[j], "artist": artists[j], "title": titles[j], "release": releases[j], "duration": durations[j], } return metadata
[docs] @deprecated(reason="Use mirdata.datasets.cante100.load_audio", version="0.3.4") def load_audio(self, *args, **kwargs): return load_audio(*args, **kwargs)
[docs] @deprecated( reason="Use mirdata.datasets.cante100.load_spectrogram", version="0.3.4" ) def load_spectrogram(self, *args, **kwargs): return load_spectrogram(*args, **kwargs)
[docs] @deprecated(reason="Use mirdata.datasets.cante100.load_melody", version="0.3.4") def load_melody(self, *args, **kwargs): return load_melody(*args, **kwargs)
[docs] @deprecated(reason="Use mirdata.datasets.cante100.load_notes", version="0.3.4") def load_notes(self, *args, **kwargs): return load_notes(*args, **kwargs)