"""vocadito Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
vocadito is a dataset of 40 short excerpts of solo, monophonic singing. The excerpts are sung in 7 different languages by singers with varying of levels of training, and are recorded on a variety of devices.
Annotations are labeled by trained musicians. For each excerpt, we provide:
frame-level f0 annotations
2 versions of note annotations (from 2 different annotators)
lyrics
language
For more details, please visit: https://zenodo.org/record/5578807
"""
import csv
import os
from typing import BinaryIO, List, Optional, TextIO, Tuple
import librosa
import numpy as np
from smart_open import open
from mirdata import annotations, core, download_utils, jams_utils, io
BIBTEX = """
@techreport{bittner2021vocadito,
title={vocadito: A dataset of solo vocals with $f_0$, note, and lyric annotations},
author={Rachel M. Bittner and Katherine Pasalo and Juan José Bosch and Gabriel Meseguer-Brocal and David Rubinstein},
year={2021},
institution={Spotify},
number={2110.05580},
note={https://arxiv.org/abs/2110.05580}
}
"""
INDEXES = {
"default": "1",
"test": "1",
"1": core.Index(filename="vocadito_index_1.json"),
}
REMOTES = {
"zenodo": download_utils.RemoteFileMetadata(
filename="Vocadito.zip",
url="https://zenodo.org/record/5578807/files/vocadito.zip?download=1",
checksum="dea40fd18f14d899643c4ba221b33a46",
)
}
LICENSE_INFO = "Creative Commons Attribution 4.0 International"
[docs]class Track(core.Track):
"""vocadito Track class
Args:
track_id (str): track id of the track
Attributes:
audio_path (str): path to the track's audio file
f0_path (str): path to the track's f0 annotation file
lyrics_path (str): path to the track's lyric annotation file
notes_a1_path (str): path to the track's note annotation file for annotator A1
notes_a2_path (str): path to the track's note annotation file for annotator A2
track_id (str): track id
singer_id (str): singer id
average_pitch_midi (int): Average pitch in midi, computed from the f0 annotation
language (str): The track's language. May contain multiple languages.
Cached Properties:
f0 (F0Data): human-annotated singing voice pitch
lyrics (List[List[str]]): human-annotated lyrics
notes_a1 (NoteData): human-annotated notes by annotator A1
notes_a2 (NoteData): human-annotated notes by annotator A2
"""
def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(track_id, data_home, dataset_name, index, metadata)
self.f0_path = self.get_path("f0")
self.lyrics_path = self.get_path("lyrics")
self.notes_a1_path = self.get_path("notesA1")
self.notes_a2_path = self.get_path("notesA2")
self.audio_path = self.get_path("audio")
@property
def singer_id(self):
return self._track_metadata.get("singer_id")
@property
def average_pitch_midi(self):
return self._track_metadata.get("average_pitch_midi")
@property
def language(self):
return self._track_metadata.get("language")
@core.cached_property
def f0(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_path)
@core.cached_property
def lyrics(self) -> Optional[List[List[str]]]:
return load_lyrics(self.lyrics_path)
@core.cached_property
def notes_a1(self) -> Optional[annotations.NoteData]:
return load_notes(self.notes_a1_path)
@core.cached_property
def notes_a2(self) -> Optional[annotations.NoteData]:
return load_notes(self.notes_a2_path)
@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""solo vocal audio (mono)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_path)
[docs] def to_jams(self):
"""Get the track's data in jams format
Returns:
jams.JAMS: the track's data in jams format
"""
return jams_utils.jams_converter(
audio_path=self.audio_path,
f0_data=[(self.f0, None)],
note_data=[
(self.notes_a1, "notes - Annotator 1"),
(self.notes_a2, "notes - Annotator 2"),
],
metadata={
"singer_id": self.singer_id,
"average_pitch_midi": int(self.average_pitch_midi),
"language": self.language,
"track_id": self.track_id,
"lyrics": self.lyrics,
},
)
[docs]@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
"""Load vocadito vocal audio
Args:
fhandle (str or file-like): File-like object or path to audio file
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return librosa.load(fhandle, sr=None, mono=True)
[docs]@io.coerce_to_string_io
def load_f0(fhandle: TextIO) -> annotations.F0Data:
"""Load a vocadito f0 annotation
Args:
fhandle (str or file-like): File-like object or path to f0 annotation file
Raises:
IOError: If f0_path does not exist
Returns:
F0Data: the f0 annotation data
"""
times_frequencies = np.genfromtxt(fhandle, delimiter=",")
return annotations.F0Data(
times=times_frequencies[:, 0],
time_unit="s",
frequencies=times_frequencies[:, 1],
frequency_unit="hz",
voicing=(times_frequencies[:, 1] > 0).astype(np.float64),
voicing_unit="binary",
)
[docs]@io.coerce_to_string_io
def load_notes(fhandle: TextIO) -> Optional[annotations.NoteData]:
"""load a note annotation file
Args:
fhandle (str or file-like): str or file-like to note annotation file
Raises:
IOError: if file doesn't exist
Returns:
NoteData: note annotation
"""
notes = np.genfromtxt(fhandle, delimiter=",")
return annotations.NoteData(
intervals=np.column_stack((notes[:, 0], notes[:, 0] + notes[:, 2])),
interval_unit="s",
pitches=notes[:, 1],
pitch_unit="hz",
)
[docs]@io.coerce_to_string_io
def load_lyrics(fhandle: TextIO) -> List[List[str]]:
"""Load a lyrics annotation
Args:
fhandle (str or file-like): File-like object or path to lyric annotation file
Raises:
IOError: if lyrics_path does not exist
Returns:
LyricData: lyric annotation data
"""
return list(csv.reader(fhandle, delimiter=" "))
[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""
The vocadito dataset
"""
def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
version,
name="vocadito",
track_class=Track,
bibtex=BIBTEX,
indexes=INDEXES,
remotes=REMOTES,
license_info=LICENSE_INFO,
)
@core.cached_property
def _metadata(self):
metadata_path = os.path.join(self.data_home, "vocadito_metadata.csv")
try:
with open(metadata_path, "r") as fhandle:
return {
row["track_id"]: {
"singer_id": row["singer_id"],
"average_pitch_midi": int(row["average_pitch"]),
"language": row["language"],
}
for row in csv.DictReader(fhandle)
}
except FileNotFoundError:
raise FileNotFoundError("Metadata not found. Did you run .download()?")