"""haydn op20 Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
This dataset accompanies the Master Thesis from Nestor Napoles. It is a manually-annotated corpus of harmonic
analysis in harm syntax.
This dataset contains 30 pieces composed by Joseph Haydn in symbolic format, which have each been manually
annotated with harmonic analyses.
"""
import logging
import os
from typing import Optional, TextIO, List
from deprecated.sphinx import deprecated
import numpy as np
from mirdata import core, io, jams_utils, download_utils
try:
import music21
except ImportError:
logging.error(
"In order to use haydn_op20 you must have music21 installed. "
"Please reinstall mirdata using `pip install 'mirdata[haydn_op20]'"
)
raise ImportError
from mirdata.annotations import KeyData, ChordData
BIBTEX = """
@dataset{nestor_napoles_lopez_2017_1095630,
author={N\'apoles L\'opez, N\'estor},
title={{Joseph Haydn - String Quartets Op.20 - Harmonic Analysis Annotations Dataset}},
month=dec,
year=2017,
publisher={Zenodo},
version={v1.1-alpha},
doi={10.5281/zenodo.1095630},
url={https://doi.org/10.5281/zenodo.1095630}
}"""
INDEXES = {
"default": "1.3",
"test": "1.3",
"1.3": core.Index(filename="haydn_op20_index_1.3.json"),
}
REMOTES = {
"all": download_utils.RemoteFileMetadata(
filename="haydnop20v1.3_annotated.zip",
url="https://github.com/napulen/haydn_op20_harm/releases/download/v1.3/haydnop20v1.3_annotated.zip",
checksum="1c65c8da312e1c9dda681d0496bf527f",
destination_dir=".",
)
}
LICENSE_INFO = (
"Creative Commons Attribution Non Commercial Share Alike 4.0 International."
)
[docs]
class Track(core.Track):
"""haydn op20 track class
Args:
track_id (str): track id of the track
Attributes:
title (str): title of the track
track_id (str): track id
humdrum_annotated_path (str): path to humdrum annotated score
Cached Properties:
keys (KeyData): annotated local keys.
keys_music21 (list): annotated local keys.
roman_numerals (list): annotated roman_numerals.
chords (ChordData): annotated chords.
chords_music21 (list): annotated chords.
duration (int): relative duration
midi_path (str): path to midi
score (music21.stream.Score): music21 score
"""
def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(track_id, data_home, dataset_name, index, metadata)
self.humdrum_annotated_path = self.get_path("annotations")
self.title = os.path.splitext(self._track_paths["annotations"][0])[0]
@core.cached_property
def score(self) -> music21.stream.Score:
return load_score(self.humdrum_annotated_path)
@core.cached_property
def keys(self) -> Optional[KeyData]:
return load_key(self.humdrum_annotated_path)
@core.cached_property
def keys_music21(self) -> Optional[List[dict]]:
return load_key_music21(self.humdrum_annotated_path)
@core.cached_property
def roman_numerals(self) -> Optional[List[dict]]:
return load_roman_numerals(self.humdrum_annotated_path)
@core.cached_property
def chords(self) -> Optional[ChordData]:
return load_chords(self.humdrum_annotated_path)
@core.cached_property
def chords_music21(self) -> Optional[List[dict]]:
return load_chords_music21(self.humdrum_annotated_path)
@core.cached_property
def duration(self) -> int:
return self.chords_music21[-1]["time"]
@core.cached_property
def midi_path(self) -> Optional[str]:
logging.warning(
"midi_path is deprecated as of 0.3.4 and will be removed in a future version."
)
return convert_and_save_to_midi(self.humdrum_annotated_path)
[docs]
def to_jams(self):
"""Get the track's data in jams format
Returns:
jams.JAMS: the track's data in jams format
"""
return jams_utils.jams_converter(
metadata={
"duration": self.duration,
"title": self.title,
"key": self.keys, # format is not the expected by keydata jams namespace
"chord": self.chords, # format is not the expected by chorddata jams namespace
"keys_music21": self.keys_music21,
"chords_music21": self.chords_music21,
"roman_numerals": self.roman_numerals,
"midi_path": self.midi_path,
"humdrum_annotated_path": self.humdrum_annotated_path,
}
)
def _split_score_annotations(fhandle: TextIO):
"""Load haydn op20 score and annotations divided.
Args:
fhandle (str or file-like): path to hrm annotations
Returns:
music21.stream.Score: score in music21 format
list: list of roman numerals [(time in seconds, roman numeral)]
"""
score = music21.converter.parse(fhandle.name, format="humdrum")
rna = {rn.offset: rn for rn in list(score.flat.getElementsByClass("RomanNumeral"))}
score.remove(list(rna.values()), recurse=True)
rna_clean = [(offset, rn) for offset, rn in rna.items() if rn]
return score, rna_clean
[docs]
@io.coerce_to_string_io
def load_score(fhandle: TextIO):
"""Load haydn op20 score with annotations from a file with music21 format (music21.stream.Score).
Args:
fhandle (str or file-like): path to score
Returns:
music21.stream.Score: score in music21 format
"""
score, rna = _split_score_annotations(fhandle)
return score
def _load_key_base(fhandle, resolution):
"""Load haydn op20 key data from a file in music21 format
Args:
fhandle (str or file-like): path to key annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
list: musical key data and relative time (offset (Music21Object.offset) * resolution) [(time in PPQ, local key)]
"""
_, rna = _split_score_annotations(fhandle)
annotations = []
for offset, rn in rna:
time = int(round(float(offset * resolution)))
tonicizedKey = rn.secondaryRomanNumeralKey
key = tonicizedKey or rn.key
annotations.append({"time": time, "key": key})
return annotations
def _format_key_string(key_string):
"""Format a key string to match key_mode format
Args:
key_string (str): unformatted key string
Returns:
str: key_mode format key string
"""
return key_string.replace("-", "b").replace(" ", ":")
[docs]
@io.coerce_to_string_io
def load_key(fhandle: TextIO, resolution=28):
"""Load haydn op20 key data from a file
Args:
fhandle (str or file-like): path to key annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
KeyData: loaded key data
"""
keys = _load_key_base(fhandle, resolution)
start_times = [0]
end_times = []
key_names = [_format_key_string(str(keys[0]["key"]))]
for i, k in enumerate(keys):
this_key_string = _format_key_string(str(k["key"]))
# if this is a new key, add it to the list
if this_key_string != key_names[-1]:
end_times.append(keys[i]["time"] - 1)
start_times.append(keys[i]["time"])
key_names.append(this_key_string)
end_times.append(keys[-1]["time"])
return KeyData(
np.array([start_times, end_times]).astype(float).T,
"ticks",
key_names,
"key_mode",
)
[docs]
@io.coerce_to_string_io
def load_key_music21(fhandle: TextIO, resolution=28):
"""Load haydn op20 key data from a file in music21 format
Args:
fhandle (str or file-like): path to key annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
list: musical key data and relative time (offset (Music21Object.offset) * resolution) [(time in PPQ, local key)]
"""
return _load_key_base(fhandle, resolution)
[docs]
@deprecated(
reason="convert_and_save_to_midi is deprecated and will be removed in a future version",
version="0.3.4",
)
@io.coerce_to_string_io
def convert_and_save_to_midi(fpath: TextIO):
"""convert to midi file and return the midi path
Args:
fpath (str or file-like): path to score file
Returns:
str: midi file path
"""
midi_path = os.path.splitext(fpath.name)[0] + ".midi"
score, _ = _split_score_annotations(fpath)
score.write("midi", fp=midi_path)
return midi_path
[docs]
@io.coerce_to_string_io
def load_roman_numerals(fhandle: TextIO, resolution=28):
"""Load haydn op20 roman numerals data from a file
Args:
fhandle (str or file-like): path to roman numeral annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
list: musical roman numerals data and relative time (offset (Music21Object.offset) * resolution) [(time in PPQ, roman numerals)]
"""
_, rna = _split_score_annotations(fhandle)
annotations = []
for offset, rn in rna:
time = int(round(float(offset * resolution)))
figure = rn.figure
annotations.append({"time": time, "roman_numeral": figure})
return annotations
def _load_chords_base(fhandle: TextIO, resolution: int = 28):
"""Load haydn op20 chords data from a file in music21 format
Args:
fhandle (str or file-like): path to chord annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
list: musical chords data and relative time (offset (Music21Object.offset) * resolution) [(time in PPQ, chord)]
"""
_, rna = _split_score_annotations(fhandle)
annotations = []
for offset, rn in rna:
time = int(round(float(offset * resolution)))
chord = rn.pitchedCommonName
annotations.append({"time": time, "chord": chord})
return annotations
[docs]
@io.coerce_to_string_io
def load_chords(fhandle: TextIO, resolution: int = 28):
"""Load haydn op20 chords data from a file
Args:
fhandle (str or file-like): path to chord annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
ChordData: chord annotations
"""
chords = _load_chords_base(fhandle, resolution)
start_times, end_times, chord_names = [0], [], [str(chords[0]["chord"])]
for ii, k in enumerate(chords):
if str(k["chord"]) != chord_names[-1]:
end_times.append(chords[ii]["time"] - 1)
start_times.append(chords[ii]["time"])
chord_names.append(str(chords[ii]["chord"]))
end_times.append(chords[-1]["time"])
return ChordData(
np.array([start_times, end_times]).astype(float).T, "ticks", chord_names, "open"
)
[docs]
@io.coerce_to_string_io
def load_chords_music21(fhandle: TextIO, resolution: int = 28):
"""Load haydn op20 chords data from a file in music21 format
Args:
fhandle (str or file-like): path to chord annotations
resolution (int): the number of pulses, or ticks, per quarter note (PPQ)
Returns:
list: musical chords data and relative time (offset (Music21Object.offset) * resolution) [(time in PPQ, chord)]
"""
return _load_chords_base(fhandle, resolution)
[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""
The haydn op20 dataset
"""
def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
name="haydn_op20",
track_class=Track,
bibtex=BIBTEX,
indexes=INDEXES,
remotes=REMOTES,
license_info=LICENSE_INFO,
)
[docs]
@deprecated(reason="Use mirdata.datasets.haydn_op20.load_score", version="0.3.4")
def load_score(self, *args, **kwargs):
return load_score(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.haydn_op20.load_key_music21", version="0.3.4"
)
def load_key_music21(self, *args, **kwargs):
return load_key_music21(*args, **kwargs)
[docs]
@deprecated(reason="Use mirdata.datasets.haydn_op20.load_key", version="0.3.4")
def load_key(self, *args, **kwargs):
return load_key(*args, **kwargs)
[docs]
@deprecated(reason="Use mirdata.datasets.haydn_op20.load_chords", version="0.3.4")
def load_chords(self, *args, **kwargs):
return load_chords(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.haydn_op20.load_chords_music21", version="0.3.4"
)
def load_chords_music21(self, *args, **kwargs):
return load_chords_music21(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.haydn_op20.load_roman_numerals", version="0.3.4"
)
def load_roman_numerals(self, *args, **kwargs):
return load_roman_numerals(*args, **kwargs)
[docs]
@deprecated(
reason="Use mirdata.datasets.haydn_op20.convert_and_save_to_midi",
version="0.3.4",
)
def load_midi_path(self, *args, **kwargs):
return convert_and_save_to_midi(*args, **kwargs)