Source code for mirdata.datasets.vocadito

"""vocadito Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    vocadito is a dataset of 40 short excerpts of solo, monophonic singing. The excerpts are sung in 7 different languages by singers with varying of levels of training, and are recorded on a variety of devices.

    Annotations are labeled by trained musicians. For each excerpt, we provide:

    frame-level f0 annotations
    2 versions of note annotations (from 2 different annotators)
    lyrics
    language

    For more details, please visit: https://zenodo.org/record/5578807

"""
import csv
import os
from typing import BinaryIO, List, Optional, TextIO, Tuple

import librosa
import numpy as np
from smart_open import open

from mirdata import annotations, core, download_utils, jams_utils, io


BIBTEX = """
@techreport{bittner2021vocadito,
      title={vocadito: A dataset of solo vocals with $f_0$, note, and lyric annotations}, 
      author={Rachel M. Bittner and Katherine Pasalo and Juan José Bosch and Gabriel Meseguer-Brocal and David Rubinstein},
      year={2021},
      institution={Spotify},
      number={2110.05580},
      note={https://arxiv.org/abs/2110.05580}
}
"""

INDEXES = {
    "default": "1",
    "test": "1",
    "1": core.Index(filename="vocadito_index_1.json"),
}

REMOTES = {
    "zenodo": download_utils.RemoteFileMetadata(
        filename="Vocadito.zip",
        url="https://zenodo.org/record/5578807/files/vocadito.zip?download=1",
        checksum="dea40fd18f14d899643c4ba221b33a46",
    )
}

LICENSE_INFO = "Creative Commons Attribution 4.0 International"


[docs]class Track(core.Track):
    """vocadito Track class

    Args:
        track_id (str): track id of the track

    Attributes:
        audio_path (str): path to the track's audio file
        f0_path (str): path to the track's f0 annotation file
        lyrics_path (str): path to the track's lyric annotation file
        notes_a1_path (str): path to the track's note annotation file for annotator A1
        notes_a2_path (str): path to the track's note annotation file for annotator A2
        track_id (str): track id
        singer_id (str): singer id
        average_pitch_midi (int): Average pitch in midi, computed from the f0 annotation
        language (str): The track's language. May contain multiple languages.

    Cached Properties:
        f0 (F0Data): human-annotated singing voice pitch
        lyrics (List[List[str]]): human-annotated lyrics
        notes_a1 (NoteData): human-annotated notes by annotator A1
        notes_a2 (NoteData): human-annotated notes by annotator A2
    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.f0_path = self.get_path("f0")
        self.lyrics_path = self.get_path("lyrics")
        self.notes_a1_path = self.get_path("notesA1")
        self.notes_a2_path = self.get_path("notesA2")

        self.audio_path = self.get_path("audio")

    @property
    def singer_id(self):
        return self._track_metadata.get("singer_id")

    @property
    def average_pitch_midi(self):
        return self._track_metadata.get("average_pitch_midi")

    @property
    def language(self):
        return self._track_metadata.get("language")

    @core.cached_property
    def f0(self) -> Optional[annotations.F0Data]:
        return load_f0(self.f0_path)

    @core.cached_property
    def lyrics(self) -> Optional[List[List[str]]]:
        return load_lyrics(self.lyrics_path)

    @core.cached_property
    def notes_a1(self) -> Optional[annotations.NoteData]:
        return load_notes(self.notes_a1_path)

    @core.cached_property
    def notes_a2(self) -> Optional[annotations.NoteData]:
        return load_notes(self.notes_a2_path)

    @property
    def audio(self) -> Optional[Tuple[np.ndarray, float]]:
        """solo vocal audio (mono)

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_path)

[docs]    def to_jams(self):
        """Get the track's data in jams format

        Returns:
            jams.JAMS: the track's data in jams format

        """
        return jams_utils.jams_converter(
            audio_path=self.audio_path,
            f0_data=[(self.f0, None)],
            note_data=[
                (self.notes_a1, "notes - Annotator 1"),
                (self.notes_a2, "notes - Annotator 2"),
            ],
            metadata={
                "singer_id": self.singer_id,
                "average_pitch_midi": int(self.average_pitch_midi),
                "language": self.language,
                "track_id": self.track_id,
                "lyrics": self.lyrics,
            },
        )


[docs]@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
    """Load vocadito vocal audio

    Args:
        fhandle (str or file-like): File-like object or path to audio file

    Returns:
        * np.ndarray - audio signal
        * float - sample rate

    """
    return librosa.load(fhandle, sr=None, mono=True)


[docs]@io.coerce_to_string_io
def load_f0(fhandle: TextIO) -> annotations.F0Data:
    """Load a vocadito f0 annotation

    Args:
        fhandle (str or file-like): File-like object or path to f0 annotation file

    Raises:
        IOError: If f0_path does not exist

    Returns:
        F0Data: the f0 annotation data

    """
    times_frequencies = np.genfromtxt(fhandle, delimiter=",")
    return annotations.F0Data(
        times=times_frequencies[:, 0],
        time_unit="s",
        frequencies=times_frequencies[:, 1],
        frequency_unit="hz",
        voicing=(times_frequencies[:, 1] > 0).astype(np.float64),
        voicing_unit="binary",
    )


[docs]@io.coerce_to_string_io
def load_notes(fhandle: TextIO) -> Optional[annotations.NoteData]:
    """load a note annotation file

    Args:
        fhandle (str or file-like): str or file-like to note annotation file

    Raises:
        IOError: if file doesn't exist

    Returns:
        NoteData: note annotation

    """
    notes = np.genfromtxt(fhandle, delimiter=",")
    return annotations.NoteData(
        intervals=np.column_stack((notes[:, 0], notes[:, 0] + notes[:, 2])),
        interval_unit="s",
        pitches=notes[:, 1],
        pitch_unit="hz",
    )


[docs]@io.coerce_to_string_io
def load_lyrics(fhandle: TextIO) -> List[List[str]]:
    """Load a lyrics annotation

    Args:
        fhandle (str or file-like): File-like object or path to lyric annotation file

    Raises:
        IOError: if lyrics_path does not exist

    Returns:
        LyricData: lyric annotation data

    """
    return list(csv.reader(fhandle, delimiter=" "))


[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The vocadito dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="vocadito",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            license_info=LICENSE_INFO,
        )

    @core.cached_property
    def _metadata(self):
        metadata_path = os.path.join(self.data_home, "vocadito_metadata.csv")
        try:
            with open(metadata_path, "r") as fhandle:
                return {
                    row["track_id"]: {
                        "singer_id": row["singer_id"],
                        "average_pitch_midi": int(row["average_pitch"]),
                        "language": row["language"],
                    }
                    for row in csv.DictReader(fhandle)
                }
        except FileNotFoundError:
            raise FileNotFoundError("Metadata not found. Did you run .download()?")