"""DALI Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
DALI contains 5358 audio files with their time-aligned vocal melody.
It also contains time-aligned lyrics at four levels of granularity: notes,
words, lines, and paragraphs.
For each song, DALI also provides additional metadata: genre, language, musician,
album covers, or links to video clips.
For more details, please visit: https://github.com/gabolsgabs/DALI
"""
import json
import gzip
import logging
import os
import pickle
from typing import BinaryIO, Optional, Tuple
from deprecated.sphinx import deprecated
import librosa
import numpy as np
from smart_open import open
from mirdata import download_utils
from mirdata import jams_utils
from mirdata import core
from mirdata import annotations
from mirdata import io
# this is the package, needed to load the annotations.
# DALI-dataset is only installed if the user explicitly declares
# they want dali when pip installing.
try:
import DALI
except ImportError as E:
logging.error(
"In order to use dali you must have dali-dataset installed. "
"Please reinstall mirdata using `pip install 'mirdata[dali]'"
)
raise
BIBTEX = """@inproceedings{Meseguer-Brocal_2018,
Title = {DALI: a large Dataset of synchronized Audio, LyrIcs and notes, automatically created using teacher-student
machine learning paradigm.},
Author = {Meseguer-Brocal, Gabriel and Cohen-Hadria, Alice and Peeters, Geoffroy},
Booktitle = {19th International Society for Music Information Retrieval Conference},
Editor = {ISMIR}, Month = {September},
Year = {2018}
}"""
INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="dali_index_1.0.json"),
}
REMOTES = {
"metadata": download_utils.RemoteFileMetadata(
filename="dali_metadata.json",
url="https://raw.githubusercontent.com/gabolsgabs/DALI/master/code/DALI/files/dali_v1_metadata.json",
checksum="40af5059e7aa97f81b2654758094d24b",
destination_dir=".",
)
}
DOWNLOAD_INFO = """
To download this dataset, visit:
https://zenodo.org/record/2577915 and request access.
Once downloaded, unzip the file DALI_v1.0.zip
and place the result in:
{}
Use the function dali_code.get_audio you can find at:
https://github.com/gabolsgabs/DALI for getting the audio
and place them in "audio" folder with the following structure:
> Dali
> audio
...
"""
LICENSE_INFO = (
"Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License."
)
[docs]
class Track(core.Track):
"""DALI melody Track class
Args:
track_id (str): track id of the track
Attributes:
album (str): the track's album
annotation_path (str): path to the track's annotation file
artist (str): the track's artist
audio_path (str): path to the track's audio file
audio_url (str): youtube ID
dataset_version (int): dataset annotation version
ground_truth (bool): True if the annotation is verified
language (str): sung language
release_date (str): year the track was released
scores_manual (int): manual score annotations
scores_ncc (float): ncc score annotations
title (str): the track's title
track_id (str): the unique track id
url_working (bool): True if the youtube url was valid
Cached Properties:
notes (NoteData): vocal notes
words (LyricData): word-level lyrics
lines (LyricData): line-level lyrics
paragraphs (LyricData): paragraph-level lyrics
annotation-object (DALI.Annotations): DALI annotation object
"""
def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(track_id, data_home, dataset_name, index, metadata)
self.annotation_path = self.get_path("annot")
self.audio_path = self.get_path("audio")
@property
def audio_url(self):
return self._track_metadata.get("audio", {}).get("url")
@property
def url_working(self):
return self._track_metadata.get("audio", {}).get("working")
@property
def ground_truth(self):
return self._track_metadata.get("ground-truth")
@property
def artist(self):
return self._track_metadata.get("artist")
@property
def title(self):
return self._track_metadata.get("title")
@property
def dataset_version(self):
return self._track_metadata.get("dataset_version")
@property
def scores_ncc(self):
return self._track_metadata.get("scores", {}).get("NCC")
@property
def scores_manual(self):
return self._track_metadata.get("scores", {}).get("manual")
@property
def album(self):
return self._track_metadata.get("metadata", {}).get("album")
@property
def release_date(self):
return self._track_metadata.get("metadata", {}).get("release_date")
@property
def genres(self):
return self._track_metadata.get("metadata", {}).get("genres")
@property
def language(self):
return self._track_metadata.get("metadata", {}).get("language")
@core.cached_property
def notes(self) -> annotations.NoteData:
return load_annotations_granularity(self.annotation_path, "notes")
@core.cached_property
def words(self) -> annotations.NoteData:
return load_annotations_granularity(self.annotation_path, "words")
@core.cached_property
def lines(self) -> annotations.NoteData:
return load_annotations_granularity(self.annotation_path, "lines")
@core.cached_property
def paragraphs(self) -> annotations.NoteData:
return load_annotations_granularity(self.annotation_path, "paragraphs")
@core.cached_property
def annotation_object(self) -> DALI.Annotations:
return load_annotations_class(self.annotation_path)
@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The track's audio
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_path)
[docs]
def to_jams(self):
"""Get the track's data in jams format
Returns:
jams.JAMS: the track's data in jams format
"""
return jams_utils.jams_converter(
audio_path=self.audio_path,
lyrics_data=[
(self.words, "word-aligned lyrics"),
(self.lines, "line-aligned lyrics"),
(self.paragraphs, "paragraph-aligned lyrics"),
],
note_data=[(self.notes, "annotated vocal notes")],
metadata=self._track_metadata,
)
[docs]
@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Optional[Tuple[np.ndarray, float]]:
"""Load a DALI audio file.
Args:
fhandle (str or file-like): path or file-like object pointing to an audio file
Returns:
* np.ndarray - the mono audio signal
* float - The sample rate of the audio file
"""
return librosa.load(fhandle, sr=None, mono=True)
[docs]
def load_annotations_granularity(annotations_path, granularity):
"""Load annotations at the specified level of granularity
Args:
annotations_path (str): path to a DALI annotation file
granularity (str): one of 'notes', 'words', 'lines', 'paragraphs'
Returns:
NoteData for granularity='notes' or LyricData otherwise
"""
try:
with gzip.open(annotations_path, "rb") as f:
output = pickle.load(f)
except Exception as e:
with gzip.open(annotations_path, "r") as f:
output = pickle.load(f)
text = []
notes = []
begs = []
ends = []
for annot in output.annotations["annot"][granularity]:
notes.append(round(annot["freq"][0], 3))
begs.append(round(annot["time"][0], 3))
ends.append(round(annot["time"][1], 3))
text.append(annot["text"])
if granularity == "notes":
annotation = annotations.NoteData(
np.array([begs, ends]).T, "s", np.array(notes), "hz"
)
else:
annotation = annotations.LyricData(np.array([begs, ends]).T, "s", text, "words")
return annotation
[docs]
def load_annotations_class(annotations_path):
"""Load full annotations into the DALI class object
Args:
annotations_path (str): path to a DALI annotation file
Returns:
DALI.annotations: DALI annotations object
"""
try:
with gzip.open(annotations_path, "rb") as f:
output = pickle.load(f)
except FileNotFoundError:
raise FileNotFoundError(
"annotations_path {} does not exist".format(annotations_path)
)
except Exception as e:
with gzip.open(annotations_path, "r") as f:
output = pickle.load(f)
return output
[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""
The dali dataset
"""
def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
version,
name="dali",
track_class=Track,
bibtex=BIBTEX,
indexes=INDEXES,
remotes=REMOTES,
download_info=DOWNLOAD_INFO,
license_info=LICENSE_INFO,
)
@core.cached_property
def _metadata(self):
metadata_path = os.path.join(self.data_home, os.path.join("dali_metadata.json"))
try:
with open(metadata_path, "r") as fhandle:
metadata_index = json.load(fhandle)
except FileNotFoundError:
raise FileNotFoundError("Metadata not found. Did you run .download()?")
return metadata_index
[docs]
@deprecated(reason="Use mirdata.datasets.dali.load_audio", version="0.3.4")
def load_audio(self, *args, **kwargs):
return load_audio(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.dali.load_annotations_granularity", version="0.3.4"
)
def load_annotations_granularity(self, *args, **kwargs):
return load_annotations_granularity(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.dali.load_annotations_class", version="0.3.4"
)
def load_annotations_class(self, *args, **kwargs):
return load_annotations_class(*args, **kwargs)