"""Jingju A Cappella Singing Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
Description:
This dataset is a collection of boundary annotations of a cappella singing performed by
Beijing Opera (Jingju, 京剧) professional and amateur singers.
Contents:
1. wav.zip: audio files in .wav format, mono or stereo.
2. pycode.zip: util code for parsing the .textgrid annotation
3. catalogue*.csv: recording metadata, source separation recordings are not included.
4. annotation_txt.zip: phrase, syllable and phoneme time boundaries (second) and labels in .txt format
The annotation_txt.zip folder annotations are represented as follows:
1. phrase_char: phrase-level time boundaries, labeled in Mandarin characters
2. phrase: phrase-level time boundaries, labeled in Mandarin pinyin
3. syllable: syllable-level time boundaries, labeled in Mandarin pinyin
4. phoneme: phoneme-level time boundaries, labeled in X-SAMPA
The boundaries (onset and offset) have been annotated hierarchically:
1. phrase (line)
2. syllable
3. phoneme
Annotation details:
Singing units in pinyin and X-SAMPA have been annotated to a jingju a cappella singing audio dataset.
Audio details:
The corresponding audio files are the a cappella singing arias recordings, which are stereo or mono,
sampled at 44.1 kHz, and stored as .wav files. The .wav files are recorded by two institutes: those file
names ending with ‘qm’ are recorded by C4DM, Queen Mary University of London; others file names ending with
‘upf’ or ‘lon’ are recorded by MTG-UPF. Additionally, another collection of 15 clean singing recordings is
included in this dataset. They are extracted from the commercial recordings which originally contains karaoke
accompaniment and mixed versions.
Additional details:
Annotation format, units, parsing code and other information please refer to:
https://github.com/MTG/jingjuPhonemeAnnotation
License information:
Textgrid annotations are licensed under Creative Commons Attribution-NonCommercial 4.0 International License.
Wav audio ending with ‘upf’ or ‘lon’ is licensed under Creative Commons Attribution-NonCommercial 4.0 International.
For the license of .wav audio ending with ‘qm’ from C4DM Queen Mary University of London, please refer to
this page http://isophonics.org/SingingVoiceDataset
"""
import csv
import os
from typing import BinaryIO, Optional, TextIO, Tuple
from deprecated.sphinx import deprecated
import librosa
import numpy as np
from smart_open import open
from mirdata import annotations, core, download_utils, io, jams_utils
BIBTEX = """
@dataset{rong_gong_2018_1323561,
author = {Rong Gong and
Rafael Caro Repetto and
Yile Yang and
Xavier Serra},
title = {Jingju a cappella singing dataset part1},
month = jul,
year = 2018,
publisher = {Zenodo},
version = 7,
doi = {10.5281/zenodo.1323561},
url = {https://doi.org/10.5281/zenodo.1323561}
}
@article{black2014automatic,
title={Automatic identification of emotional cues in Chinese opera singing},
author={Black, Dawn AA and Li, Ma and Tian, Mi},
journal={ICMPC, Seoul, South Korea},
year={2014}
}
"""
INDEXES = {
"default": "7.0",
"test": "7.0",
"7.0": core.Index(filename="compmusic_jingju_acappella_index_7.0.json"),
}
REMOTES = {
"annotation_txt": download_utils.RemoteFileMetadata(
filename="annotation_txt.zip",
url="https://zenodo.org/record/1323561/files/annotation_txt.zip?download=1",
checksum="851c9c3fe195fd20bec42d32ddd9deb7",
destination_dir=".",
),
"catalogue_dan": download_utils.RemoteFileMetadata(
filename="catalogue - dan.csv",
url="https://zenodo.org/record/1323561/files/catalogue%20-%20dan.csv?download=1",
checksum="82ce90bd8508b1ae12c6a1fe489618a4",
destination_dir=".",
),
"catalogue_laosheng": download_utils.RemoteFileMetadata(
filename="catalogue - laosheng.csv",
url="https://zenodo.org/record/1323561/files/catalogue%20-%20laosheng.csv?download=1",
checksum="768fa00ce1f8880ae5480fae103ecc06",
destination_dir=".",
),
"wav": download_utils.RemoteFileMetadata(
filename="wav.zip",
url="https://zenodo.org/record/1323561/files/wav.zip?download=1",
checksum="4722abda831c20b169a62b2754b15bea",
destination_dir=".",
),
}
LICENSE_INFO = (
"audio files ending with upf or lon: Creative Commons Attribution Non-Commercial 4.0"
" International, "
+ "audio files ending with qm: http://isophonics.org/SingingVoiceDataset"
)
[docs]
class Track(core.Track):
"""Jingju A Cappella Singing Track class
Args:
track_id (str): track id of the track
data_home (str): Local path where the dataset is stored. default=None
If `None`, looks for the data in the default directory, `~/mir_datasets`
Attributes:
audio_path (str): local path where the audio is stored
phoneme_path (str): local path where the phoneme annotation is stored
phrase_char_path (str): local path where the lyric phrase annotation in chinese is stored
phrase_path (str): local path where the lyric phrase annotation in western characters is stored
syllable_path (str): local path where the syllable annotation is stored
work (str): string referring to the work where the track belongs
details (float): string referring to additional details about the track
Cached Properties:
phoneme (EventData): phoneme annotation
phrase_char (LyricsData): lyric phrase annotation in chinese
phrase (LyricsData): lyric phrase annotation in western characters
syllable (EventData): syllable annotation
"""
def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(track_id, data_home, dataset_name, index, metadata)
self.audio_path = self.get_path("audio")
self.phoneme_path = self.get_path("phoneme")
self.phrase_char_path = self.get_path("phrase_char")
self.phrase_path = self.get_path("phrase")
self.syllable_path = self.get_path("syllable")
@core.cached_property
def phoneme(self):
return load_phonemes(self.phoneme_path)
@core.cached_property
def phrase(self):
return load_phrases(self.phrase_path)
@core.cached_property
def phrase_char(self):
return load_phrases(self.phrase_char_path)
@core.cached_property
def syllable(self):
return load_syllable(self.syllable_path)
@property
def work(self):
return self._track_metadata.get("work")
@property
def details(self):
return self._track_metadata.get("details")
@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The track's audio
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_path)
[docs]
def to_jams(self):
"""Get the track's data in jams format
Returns:
jams.JAMS: the track's data in jams format
"""
return jams_utils.jams_converter(
audio_path=self.audio_path,
lyrics_data=[
(self.phrase, "phrases"),
(self.phrase_char, "phrases_char"),
(self.phoneme, "phoneme"),
(self.syllable, "syllable"),
],
metadata={"work": self.work, "details": self.details},
)
[docs]
@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
"""Load Jingju A Cappella Singing audio file.
Args:
fhandle (str or file-like): File-like object or path to audio file
Returns:
* np.ndarray - the mono audio signal
* float - The sample rate of the audio file
"""
return librosa.load(fhandle, sr=44100, mono=True)
[docs]
@io.coerce_to_string_io
def load_phonemes(fhandle: TextIO) -> annotations.LyricData:
"""Load phonemes
Args:
fhandle (str or file-like): path or file-like object pointing to a phoneme annotation file
Returns:
LyricData: phoneme annotation
"""
start_times = []
end_times = []
events = []
reader = csv.reader(fhandle, delimiter="\t")
for line in reader:
start_times.append(float(line[0]))
end_times.append(float(line[1]))
events.append(str(line[2] if line[2] != "sil" else ""))
return annotations.LyricData(
np.array([start_times, end_times]).T, "s", events, "pronunciations_open"
)
[docs]
@io.coerce_to_string_io
def load_phrases(fhandle: TextIO) -> annotations.LyricData:
"""Load lyric phrases annotation
Args:
fhandle (str or file-like): path or file-like object pointing to a lyric annotation file
Returns:
LyricData: lyric phrase annotation
"""
start_times = []
end_times = []
lyrics = []
reader = csv.reader(fhandle, delimiter="\t")
for line in reader:
start_times.append(float(line[0]))
end_times.append(float(line[1]))
lyrics.append(line[2] if line[2] != "sil" else "")
return annotations.LyricData(
np.array([start_times, end_times]).T, "s", lyrics, "words"
)
[docs]
@io.coerce_to_string_io
def load_syllable(fhandle: TextIO) -> annotations.LyricData:
"""Load syllable
Args:
fhandle (str or file-like): path or file-like object pointing to a syllable annotation file
Returns:
LyricData: syllable annotation
"""
start_times = []
end_times = []
events = []
reader = csv.reader(fhandle, delimiter="\t")
for line in reader:
start_times.append(float(line[0]))
end_times.append(float(line[1]))
events.append(line[2] if line[2] != "sil" else "")
return annotations.LyricData(
np.array([start_times, end_times]).T, "s", events, "syllable_open"
)
[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""
The compmusic_jingju_acappella dataset
"""
def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
version,
name="compmusic_jingju_acappella",
track_class=Track,
bibtex=BIBTEX,
indexes=INDEXES,
remotes=REMOTES,
license_info=LICENSE_INFO,
)
@core.cached_property
def _metadata(self):
metadata_path_laosheng = os.path.join(
self.data_home, "catalogue - laosheng.csv"
)
# metadata_path_dan = os.path.join(
# self.data_home,
# "catalogue - dan.csv",
# )
metadata = {}
try:
with open(metadata_path_laosheng, "r", encoding="utf-8") as fhandle:
reader = csv.reader(fhandle, delimiter=",")
next(reader)
for line in reader:
work = line[1] if line[1] else None
details = line[3] if line[3] else None
metadata[line[0]] = {"work": work, "details": details}
data_home = os.path.dirname(metadata_path_laosheng)
metadata["data_home"] = data_home
except FileNotFoundError:
raise FileNotFoundError(
"laosheng metadata not found. Did you run .download()?"
)
return metadata
[docs]
@deprecated(
reason="Use mirdata.datasets.jingju_acapella.load_phonemes", version="0.3.4"
)
def load_phonemes(self, *args, **kwargs):
return load_phonemes(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.jingju_acapella.load_phrases", version="0.3.4"
)
def load_phrases(self, *args, **kwargs):
return load_phrases(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.jingju_acapella.load_syllable", version="0.3.4"
)
def load_syllable(self, *args, **kwargs):
return load_syllable(*args, **kwargs)