"""Dagstuhl ChoirSet Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
Dagstuhl ChoirSet (DCS) is a multitrack dataset of a cappella choral music.
The dataset includes recordings of an amateur vocal ensemble performing two
choir pieces in full choir and quartet settings (total duration 55min 30sec).
The audio data was recorded during an MIR seminar at Schloss Dagstuhl using
different close-up microphones to capture the individual singers’ voices:
* Larynx microphone (LRX): contact microphone attached to the singer's throat.
* Dynamic microphone (DYN): handheld dynamic microphone.
* Headset microphone (HSM): microphone close to the singer's mouth.
LRX, DYN and HSM recordings are provided on the Track level.
All tracks in the dataset have a LRX recording, while only a subset has DYN and HSM recordings.
In addition to the close-up microphone tracks, the dataset also provides the following recordings:
* Room microphone mixdown (STM): mixdown of the stereo room microphone.
* Room microphone left (STL): left channel of the stereo microphone.
* Room microphone right (STR): right channel of the stereo microphone.
* Room microphone mixdown with reverb (StereoReverb_STM): STM signal with artificial reverb.
* Piano left (SPL): left channel of the piano accompaniment.
* Piano right (SPR): right channel of the piano accompaniment.
All room microphone and piano recordings are provided on the Multitrack level.
All multitracks have room microphone signals, while only a subset has piano recordings.
For more details, we refer to:
Sebastian Rosenzweig (1), Helena Cuesta (2), Christof Weiß (1),
Frank Scherbaum (3), Emilia Gómez (2,4), and Meinard Müller (1):
Dagstuhl ChoirSet: A Multitrack Dataset for MIR Research on Choral Singing.
Transactions of the International Society for Music Information Retrieval,
3(1), pp. 98–110, 2020.
DOI: https://doi.org/10.5334/tismir.48
(1) International Audio Laboratories Erlangen, DE
(2) Music Technology Group, Universitat Pompeu Fabra, Barcelona, ES
(3) University of Potsdam, DE
(4) Joint Research Centre, European Commission, Seville, ES
"""
import csv
from typing import BinaryIO, Optional, TextIO, Tuple, List
from deprecated.sphinx import deprecated
import librosa
import numpy as np
from mirdata import download_utils, jams_utils, core, annotations, io
BIBTEX = """
@article{RosenzweigCWSGM20_DCS_TISMIR,
author = {Sebastian Rosenzweig and Helena Cuesta and Christof Wei{\ss} and Frank Scherbaum and Emilia G{\'o}mez and Meinard M{\"u}ller},
title = {{D}agstuhl {ChoirSet}: {A} Multitrack Dataset for {MIR} Research on Choral Singing},
journal = {Transactions of the International Society for Music Information Retrieval ({TISMIR})},
volume = {3},
number = {1},
year = {2020},
pages = {98--110},
publisher = {Ubiquity Press},
doi = {10.5334/tismir.48},
url = {http://doi.org/10.5334/tismir.48},
url-demo = {https://www.audiolabs-erlangen.de/resources/MIR/2020-DagstuhlChoirSet}
}
"""
INDEXES = {
"default": "1.2.3",
"test": "1.2.3",
"1.2.3": core.Index(filename="dagstuhl_choirset_index_1.2.3.json"),
}
REMOTES = {
"full_dataset": download_utils.RemoteFileMetadata(
filename="DagstuhlChoirSet_V1.2.3.zip",
url="https://zenodo.org/record/4618287/files/DagstuhlChoirSet_V1.2.3.zip?download=1",
checksum="82b95faa634d0c9fc05c81e0868f0217",
unpack_directories=["DagstuhlChoirSet_V1.2.3"],
)
}
LICENSE_INFO = """
Creative Commons Attribution 4.0 International
"""
[docs]
class Track(core.Track):
"""Dagstuhl ChoirSet Track class
Args:
track_id (str): track id of the track
Attributes:
audio_dyn_path (str): dynamic microphone audio path
audio_hsm_path (str): headset microphone audio path
audio_lrx_path (str): larynx microphone audio path
f0_crepe_dyn_path (str): crepe f0 annotation for dynamic microphone path
f0_crepe_hsm_path (str): crepe f0 annotation for headset microphone path
f0_crepe_lrx_path (str): crepe f0 annotation for larynx microphone path
f0_pyin_dyn_path (str): pyin f0 annotation for dynamic microphone path
f0_pyin_hsm_path (str): pyin f0 annotation for headset microphone path
f0_pyin_lrx_path (str): pyin f0 annotation for larynx microphone path
f0_manual_lrx_path (str): manual f0 annotation for larynx microphone path
score_path (str): score annotation path
Cached Properties:
f0_crepe_dyn (F0Data): algorithm-labeled (crepe) f0 annotations for dynamic microphone
f0_crepe_hsn (F0Data): algorithm-labeled (crepe) f0 annotations for headset microphone
f0_crepe_lrx (F0Data): algorithm-labeled (crepe) f0 annotations for larynx microphone
f0_pyin_dyn (F0Data): algorithm-labeled (pyin) f0 annotations for dynamic microphone
f0_pyin_hsn (F0Data): algorithm-labeled (pyin) f0 annotations for headset microphone
f0_pyin_lrx (F0Data): algorithm-labeled (pyin) f0 annotations for larynx microphone
f0_manual_lrx (F0Data): manually labeled f0 annotations for larynx microphone
score (NoteData): time-aligned score representation
"""
def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(
track_id=track_id,
data_home=data_home,
dataset_name=dataset_name,
index=index,
metadata=metadata,
)
self.audio_dyn_path = self.get_path("audio_dyn")
self.audio_hsm_path = self.get_path("audio_hsm")
self.audio_lrx_path = self.get_path("audio_lrx")
self.f0_crepe_dyn_path = self.get_path("f0_crepe_dyn")
self.f0_crepe_hsm_path = self.get_path("f0_crepe_hsm")
self.f0_crepe_lrx_path = self.get_path("f0_crepe_lrx")
self.f0_pyin_dyn_path = self.get_path("f0_pyin_dyn")
self.f0_pyin_hsm_path = self.get_path("f0_pyin_hsm")
self.f0_pyin_lrx_path = self.get_path("f0_pyin_lrx")
self.f0_manual_lrx_path = self.get_path("f0_manual_lrx")
self.score_path = self.get_path("score")
@core.cached_property
def f0_crepe_dyn(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_crepe_dyn_path)
@core.cached_property
def f0_crepe_hsm(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_crepe_hsm_path)
@core.cached_property
def f0_crepe_lrx(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_crepe_lrx_path)
@core.cached_property
def f0_pyin_dyn(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_pyin_dyn_path)
@core.cached_property
def f0_pyin_hsm(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_pyin_hsm_path)
@core.cached_property
def f0_pyin_lrx(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_pyin_lrx_path)
@core.cached_property
def f0_manual_lrx(self) -> Optional[annotations.F0Data]:
return load_f0(self.f0_manual_lrx_path)
@core.cached_property
def score(self) -> Optional[annotations.NoteData]:
return load_score(self.score_path)
@property
def audio_dyn(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the track's dynamic microphone (if available)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_dyn_path)
@property
def audio_hsm(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the track's headset microphone (if available)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_hsm_path)
@property
def audio_lrx(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the track's larynx microphone (if available)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_lrx_path)
[docs]
def to_jams(self):
"""Jams: the track's data in jams format"""
f0_data = [
(self.f0_crepe_dyn, "crepe - DYN"),
(self.f0_crepe_hsm, "crepe - HSM"),
(self.f0_crepe_lrx, "crepe - LRX"),
(self.f0_pyin_dyn, "pyin - DYN"),
(self.f0_pyin_hsm, "pyin - HSM"),
(self.f0_pyin_lrx, "pyin - LRX"),
(self.f0_manual_lrx, "manual - LRX"),
]
# remove missing annotations from the list
f0_data = [tup for tup in f0_data if tup[1]]
score_data = [(self.score, "score")] if self.score else None
if self.audio_hsm_path:
audio_path = self.audio_hsm_path
elif self.audio_dyn_path:
audio_path = self.audio_dyn_path
else:
audio_path = self.audio_lrx_path
return jams_utils.jams_converter(
audio_path=audio_path, f0_data=f0_data, note_data=score_data
)
[docs]
class MultiTrack(core.MultiTrack):
"""Dagstuhl ChoirSet multitrack class
Args:
mtrack_id (str): multitrack id
data_home (str): Local path where the dataset is stored.
If `None`, looks for the data in the default directory, `~/mir_datasets/dagstuhl_choirset`
Attributes:
audio_stm_path (str): path to room mic (mono mixdown) audio file
audio_str_path (str): path to room mic (right channel) audio file
audio_stl_path (str): path to room mic (left channel) audio file
audio_rev_path (str): path to room mic with artifical reverb (mono mixdown) audio file
audio_spl_path (str): path to piano accompaniment (left channel) audio file
audio_spr_path (str): path to piano accompaniement (right channel) audio file
beat_path (str): path to beat annotation file
Cached Properties:
beat (annotations.BeatData): Beat annotation
notes (annotations.NoteData): Note annotation
multif0 (annotations.MultiF0Data): Aggregate of f0 annotations for tracks
"""
def __init__(
self, mtrack_id, data_home, dataset_name, index, track_class, metadata
):
super().__init__(
mtrack_id=mtrack_id,
data_home=data_home,
dataset_name=dataset_name,
index=index,
track_class=track_class,
metadata=metadata,
)
self.audio_stm_path = self.get_path("audio_stm")
self.audio_str_path = self.get_path("audio_str")
self.audio_stl_path = self.get_path("audio_stl")
self.audio_rev_path = self.get_path("audio_rev")
self.audio_spl_path = self.get_path("audio_spl")
self.audio_spr_path = self.get_path("audio_spr")
self.beat_path = self.get_path("beat")
@property
def track_audio_property(self):
return "audio_dyn"
@core.cached_property
def beat(self) -> Optional[annotations.BeatData]:
return load_beat(self.beat_path)
@core.cached_property
def notes(self) -> Optional[annotations.NoteData]:
tracks_with_notes = [t for t in self.tracks.values() if t.score is not None]
if len(tracks_with_notes) == 0:
return None
notes = tracks_with_notes[0].score
if len(tracks_with_notes) > 1:
for track in tracks_with_notes[1:]:
notes += track.score
return notes
@core.cached_property
def multif0(self) -> Optional[annotations.MultiF0Data]:
f0_priority = [
"f0_manual_lrx",
"f0_crepe_lrx",
"f0_pyin_lrx",
"f0_crepe_hsm",
"f0_pyin_hsm",
"f0_crepe_dyn",
"f0_pyin_dyn",
]
multif0 = None
for track in self.tracks.values():
f0_data: Optional[annotations.F0Data] = None
# get the best f0 annotation we can for this track
for f0_attr in f0_priority:
if getattr(track, f0_attr) is not None:
f0_data = getattr(track, f0_attr)
break
if multif0 is None:
multif0 = f0_data.to_multif0() # type: ignore
else:
multif0 += f0_data
return multif0
@property
def audio_stm(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the room mic (mono mixdown)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_stm_path)
@property
def audio_str(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the room mic (right channel)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_str_path)
@property
def audio_stl(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the room mic (left channel)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_stl_path)
@property
def audio_rev(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the room mic with artifical reverb (mono mixdown)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_rev_path)
@property
def audio_spl(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the piano accompaniment DI (left channel)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_spl_path)
@property
def audio_spr(self) -> Optional[Tuple[np.ndarray, float]]:
"""The audio for the piano accompaniment DI (right channel)
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_spr_path)
[docs]
def to_jams(self):
"""Jams: the track's data in jams format"""
beat_data = [(self.beat, "beat")] if self.beat else None
return jams_utils.jams_converter(
audio_path=self.audio_stm_path, beat_data=beat_data
)
[docs]
@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
"""Load a Dagstuhl ChoirSet audio file.
Args:
audio_path (str): path pointing to an audio file
Returns:
* np.ndarray - the audio signal
* float - The sample rate of the audio file
"""
return librosa.load(fhandle, sr=22050, mono=True)
[docs]
@io.coerce_to_string_io
def load_f0(fhandle: TextIO) -> annotations.F0Data:
"""Load a Dagstuhl ChoirSet F0-trajectory.
Args:
fhandle (str or file-like): File-like object or path to F0 file
Returns:
F0Data Object - the F0-trajectory
"""
times = []
freqs = []
voicings = []
confs: List[Optional[float]]
conf_array: Optional[np.ndarray]
confs = []
reader = csv.reader(fhandle, delimiter=",")
for line in reader:
times.append(float(line[0]))
freq_val = float(line[1])
voicings.append(float(freq_val > 0))
freqs.append(np.abs(freq_val))
if len(line) == 3:
confs.append(float(line[2]))
else:
confs.append(None)
if all([not c for c in confs]):
conf_array = None
conf_unit = None
else:
conf_array = np.array(confs)
conf_unit = "likelihood"
return annotations.F0Data(
np.array(times),
"s",
np.array(freqs),
"hz",
np.array(voicings),
"binary",
conf_array,
conf_unit,
)
[docs]
@io.coerce_to_string_io
def load_score(fhandle: TextIO) -> annotations.NoteData:
"""Load a Dagstuhl ChoirSet time-aligned score representation.
Args:
fhandle (str or file-like): File-like object or path to score representation file
Returns:
NoteData Object - the time-aligned score representation
"""
intervals = []
notes = []
reader = csv.reader(fhandle, delimiter=",")
for line in reader:
intervals.append([float(line[0]), float(line[1])])
notes.append(float(line[2]))
return annotations.NoteData(
np.array(intervals), "s", librosa.midi_to_hz(notes), "hz"
)
[docs]
@io.coerce_to_string_io
def load_beat(fhandle: TextIO) -> annotations.BeatData:
"""Load a Dagstuhl ChoirSet beat annotation.
Args:
fhandle (str or file-like): File-like object or path to beat annotation file
Returns:
BeatData Object - the beat annotation
"""
times = []
positions = []
position = 0
reader = csv.reader(fhandle, delimiter=",")
for line in reader:
times.append(float(line[0]))
raw_position = float(line[1])
if np.floor(raw_position) == raw_position:
position = 1
else:
position += 1
positions.append(position)
return annotations.BeatData(np.array(times), "s", np.array(positions), "bar_index")
[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""
The Dagstuhl ChoirSet dataset
"""
def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
version,
name="dagstuhl_choirset",
track_class=Track,
multitrack_class=MultiTrack,
bibtex=BIBTEX,
indexes=INDEXES,
remotes=REMOTES,
license_info=LICENSE_INFO,
)
[docs]
@deprecated(
reason="Use mirdata.datasets.dagstuhl_choirset.load_audio", version="0.3.4"
)
def load_audio(self, *args, **kwargs):
return load_audio(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.dagstuhl_choirset.load_f0", version="0.3.4"
)
def load_f0(self, *args, **kwargs):
return load_f0(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.dagstuhl_choirset.load_score", version="0.3.4"
)
def load_score(self, *args, **kwargs):
return load_score(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.dagstuhl_choirset.load_beat", version="0.3.4"
)
def load_beat(self, *args, **kwargs):
return load_beat(*args, **kwargs)