Source code for mirdata.datasets.guitarset

"""GuitarSet Loader

.. admonition:: Dataset Info
    :class: dropdown

    GuitarSet provides audio recordings of a variety of musical excerpts
    played on an acoustic guitar, along with time-aligned annotations
    including pitch contours, string and fret positions, chords, beats,
    downbeats, and keys.

    GuitarSet contains 360 excerpts that are close to 30 seconds in length.
    The 360 excerpts are the result of the following combinations:

    - 6 players
    - 2 versions: comping (harmonic accompaniment) and soloing (melodic improvisation)
    - 5 styles: Rock, Singer-Songwriter, Bossa Nova, Jazz, and Funk
    - 3 Progressions: 12 Bar Blues, Autumn Leaves, and Pachelbel Canon.
    - 2 Tempi: slow and fast.

    The tonality (key) of each excerpt is sampled uniformly at random.

    GuitarSet was recorded with the help of a hexaphonic pickup, which outputs
    signals for each string separately, allowing automated note-level annotation.
    Excerpts are recorded with both the hexaphonic pickup and a Neumann U-87
    condenser microphone as reference.
    3 audio recordings are provided with each excerpt with the following suffix:

    - hex: original 6 channel wave file from hexaphonic pickup
    - hex_cln: hex wave files with interference removal applied
    - mic: monophonic recording from reference microphone
    - mix: monophonic mixture of original 6 channel file

    Each of the 360 excerpts has an accompanying JAMS file which stores 16 annotations.
    Pitch:

    - 6 pitch_contour annotations (1 per string)
    - 6 midi_note annotations (1 per string)

    Beat and Tempo:

    - 1 beat_position annotation
    - 1 tempo annotation

    Chords:

    - 2 chord annotations: instructed and performed. The instructed chord annotation
      is a digital version of the lead sheet that's provided to the player, and the
      performed chord annotations are inferred from note annotations, using
      segmentation and root from the digital lead sheet annotation.

    For more details, please visit: http://github.com/marl/guitarset/

"""
import logging
import os
from typing import BinaryIO, Optional, TextIO, Tuple, Dict, List

from deprecated.sphinx import deprecated
import jams
import librosa
import numpy as np
from smart_open import open

from mirdata import annotations, core, download_utils, io


BIBTEX = """@inproceedings{xi2018guitarset,
title={GuitarSet: A Dataset for Guitar Transcription},
author={Xi, Qingyang and Bittner, Rachel M and Ye, Xuzhou and Pauwels, Johan and Bello, Juan P},
booktitle={International Society of Music Information Retrieval (ISMIR)},
year={2018}
}"""

INDEXES = {
    "default": "1.1.0",
    "test": "1.1.0",
    "1.1.0": core.Index(filename="guitarset_index_1.1.0.json"),
}

REMOTES = {
    "annotations": download_utils.RemoteFileMetadata(
        filename="annotation.zip",
        url="https://zenodo.org/record/3371780/files/annotation.zip?download=1",
        checksum="b39b78e63d3446f2e54ddb7a54df9b10",
        destination_dir="annotation",
    ),
    "audio_hex_debleeded": download_utils.RemoteFileMetadata(
        filename="audio_hex-pickup_debleeded.zip",
        url="https://zenodo.org/record/3371780/files/audio_hex-pickup_debleeded.zip?download=1",
        checksum="c31d97279464c9a67e640cb9061fb0c6",
        destination_dir="audio_hex-pickup_debleeded",
    ),
    "audio_hex_original": download_utils.RemoteFileMetadata(
        filename="audio_hex-pickup_original.zip",
        url="https://zenodo.org/record/3371780/files/audio_hex-pickup_original.zip?download=1",
        checksum="f9911bf217cb40e9e68edf3726ef86cc",
        destination_dir="audio_hex-pickup_original",
    ),
    "audio_mic": download_utils.RemoteFileMetadata(
        filename="audio_mono-mic.zip",
        url="https://zenodo.org/record/3371780/files/audio_mono-mic.zip?download=1",
        checksum="275966d6610ac34999b58426beb119c3",
        destination_dir="audio_mono-mic",
    ),
    "audio_mix": download_utils.RemoteFileMetadata(
        filename="audio_mono-pickup_mix.zip",
        url="https://zenodo.org/record/3371780/files/audio_mono-pickup_mix.zip?download=1",
        checksum="aecce79f425a44e2055e46f680e10f6a",
        destination_dir="audio_mono-pickup_mix",
    ),
}
_STYLE_DICT = {
    "Jazz": "Jazz",
    "BN": "Bossa Nova",
    "Rock": "Rock",
    "SS": "Singer-Songwriter",
    "Funk": "Funk",
}
_GUITAR_STRINGS = ["E", "A", "D", "G", "B", "e"]
CONTOUR_HOP = 256.0 / 44100

LICENSE_INFO = "MIT License."


[docs]class Track(core.Track):
    """guitarset Track class

    Args:
        track_id (str): track id of the track

    Attributes:
        audio_hex_cln_path (str): path to the debleeded hex wave file
        audio_hex_path (str): path to the original hex wave file
        audio_mic_path (str): path to the mono wave via microphone
        audio_mix_path (str): path to the mono wave via downmixing hex pickup
        jams_path (str): path to the jams file
        mode (str): one of ['solo', 'comp']
            For each excerpt, players are asked to first play in 'comp' mode
            and later play a 'solo' version on top of the already recorded comp.
        player_id (str): ID of the different players.
            one of ['00', '01', ... , '05']
        style (str): one of ['Jazz', 'Bossa Nova', 'Rock', 'Singer-Songwriter', 'Funk']
        tempo (float): BPM of the track
        track_id (str): track id

    Cached Properties:
        beats (BeatData): beat positions
        leadsheet_chords (ChordData): chords as written in the leadsheet
        inferred_chords (ChordData): chords inferred from played transcription
        key_mode (KeyData): key and mode
        pitch_contours (dict):
            Pitch contours per string
            - 'E': F0Data(...)
            - 'A': F0Data(...)
            - 'D': F0Data(...)
            - 'G': F0Data(...)
            - 'B': F0Data(...)
            - 'e': F0Data(...)
        multif0 (MultiF0Data): all pitch contour data as one multif0 annotation
        notes (dict):
            Notes per string
            - 'E': NoteData(...)
            - 'A': NoteData(...)
            - 'D': NoteData(...)
            - 'G': NoteData(...)
            - 'B': NoteData(...)
            - 'e': NoteData(...)
        notes_all (NoteData): all note data as one note annotation

    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.audio_hex_cln_path = self.get_path("audio_hex_cln")
        self.audio_hex_path = self.get_path("audio_hex")
        self.audio_mic_path = self.get_path("audio_mic")
        self.audio_mix_path = self.get_path("audio_mix")
        self.jams_path = self.get_path("jams")

        title_list = track_id.split("_")  # [PID, S-T-K, mode, rec_mode]
        style, tempo, _ = title_list[1].split("-")  # [style, tempo, key]
        self.player_id = title_list[0]
        self.mode = title_list[2]
        self.tempo = float(tempo)
        self.style = _STYLE_DICT[style[:-1]]

    @core.cached_property
    def beats(self) -> Optional[annotations.BeatData]:
        return load_beats(self.jams_path)

    @core.cached_property
    def leadsheet_chords(self):
        if self.mode == "solo":
            logging.info(
                "Chord annotations for solo excerpts are the same with the comp excerpt."
            )
        return load_chords(self.jams_path, True)

    @core.cached_property
    def inferred_chords(self):
        if self.mode == "solo":
            logging.info(
                "Chord annotations for solo excerpts are the same as the comp excerpt."
            )
        return load_chords(self.jams_path, False)

    @core.cached_property
    def key_mode(self) -> Optional[annotations.KeyData]:
        return load_key_mode(self.jams_path)

    @core.cached_property
    def pitch_contours(self) -> Dict[str, annotations.F0Data]:
        contours = {}
        # iterate over 6 strings
        for i in range(6):
            contours[_GUITAR_STRINGS[i]] = load_pitch_contour(self.jams_path, i)
        return contours

    @core.cached_property
    def multif0(self) -> annotations.MultiF0Data:
        contours: List[annotations.F0Data] = list(self.pitch_contours.values())
        max_times = np.argmax(
            [
                0 if contour_data is None else len(contour_data.times)
                for contour_data in contours
            ]
        )  # type: ignore
        times = contours[max_times].times  # type: ignore
        frequency_list: List[list] = [[] for _ in times]
        for contour in contours:
            if contour is None:
                continue

            for i, f in enumerate(contour.frequencies):
                if f > 0:
                    frequency_list[i].append(f)
        return annotations.MultiF0Data(times, "s", frequency_list, "hz")

    @core.cached_property
    def notes(self) -> Dict[str, annotations.NoteData]:
        notes = {}
        # iterate over 6 strings
        for i in range(6):
            notes[_GUITAR_STRINGS[i]] = load_notes(self.jams_path, i)
        return notes

    @core.cached_property
    def notes_all(self) -> Optional[annotations.NoteData]:
        all_note_data = None
        for note_data in self.notes.values():
            if all_note_data is None:
                all_note_data = note_data
            else:
                all_note_data += note_data
        return all_note_data

    @property
    def audio_mic(self) -> Optional[Tuple[np.ndarray, float]]:
        """The track's audio

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_mic_path)

    @property
    def audio_mix(self) -> Optional[Tuple[np.ndarray, float]]:
        """Mixture audio (mono)

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_mix_path)

    @property
    def audio_hex(self) -> Optional[Tuple[np.ndarray, float]]:
        """Hexaphonic audio (6-channels) with one channel per string

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_multitrack_audio(self.audio_hex_path)

    @property
    def audio_hex_cln(self) -> Optional[Tuple[np.ndarray, float]]:
        """Hexaphonic audio (6-channels) with one channel per string
           after bleed removal

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_multitrack_audio(self.audio_hex_cln_path)

[docs]    def to_jams(self):
        """Get the track's data in jams format

        Returns:
            jams.JAMS: the track's data in jams format

        """
        return jams.load(self.jams_path)


[docs]@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
    """Load a Guitarset audio file.

    Args:
        fhandle (str or file-like): File-like object or path to audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    return librosa.load(fhandle, sr=None, mono=True)


[docs]@io.coerce_to_bytes_io
def load_multitrack_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
    """Load a Guitarset multitrack audio file.

    Args:
        fhandle (str or file-like): File-like object or path to audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    return librosa.load(fhandle, sr=None, mono=False)


[docs]@io.coerce_to_string_io
def load_beats(fhandle: TextIO) -> annotations.BeatData:
    """Load a Guitarset beats annotation.

    Args:
        fhandle (str or file-like): File-like object or path
            of the jams annotation file

    Returns:
        BeatData: Beat data
    """
    jam = jams.load(fhandle)
    anno = jam.search(namespace="beat_position")[0]
    times, values = anno.to_event_values()
    positions = [int(v["position"]) for v in values]
    return annotations.BeatData(times, "s", np.array(positions), "bar_index")


# no decorator because of https://github.com/mir-dataset-loaders/mirdata/issues/503
[docs]def load_chords(jams_path, leadsheet_version):
    """Load a guitarset chord annotation.

    Args:
        jams_path (str): path to the jams annotation file
        leadsheet_version (Bool):
            Whether or not to load the leadsheet version of the chord annotation
            If False, load the infered version.

    Returns:
        ChordData: Chord data

    """
    try:
        with open(jams_path, "r") as fhandle:
            jam = jams.load(fhandle)
    except FileNotFoundError:
        raise FileNotFoundError("jams_path {} does not exist".format(jams_path))

    if leadsheet_version:
        anno = jam.search(namespace="chord")[0]
    else:
        anno = jam.search(namespace="chord")[1]
    intervals, values = anno.to_interval_values()
    return annotations.ChordData(intervals, "s", values, "jams")


[docs]@io.coerce_to_string_io
def load_key_mode(fhandle: TextIO) -> annotations.KeyData:
    """Load a Guitarset key-mode annotation.

    Args:
        fhandle (str or file-like): File-like object or path of the jams annotation file

    Returns:
        KeyData: Key data

    """
    jam = jams.load(fhandle)
    anno = jam.search(namespace="key_mode")[0]
    intervals, values = anno.to_interval_values()
    return annotations.KeyData(intervals, "s", values, "key_mode")


def _fill_pitch_contour(times, freqs, voicing, max_time, contour_hop, duration=None):
    """Fill a pitch contour with missing time stamps (during unpitched frames)

    Args:
        times (np.array): array of time stamps in seconds
        freqs (np.array): array of pitch values in Hz
        voicing (np.array): array of voicings
        max_time (float): maximum time stamp
        contour_hop (float): hop size in seconds
        duration (float, optional): Total duration. Defaults to None.

    Returns:
        tuple: filled_times, filled_frequencies, filled_voicing
    """
    if duration is not None and max_time > duration:
        max_time = duration
    n_stamps = int(np.floor((max_time / contour_hop)))
    filled_times = np.arange(n_stamps) * contour_hop
    filled_freqs = np.zeros((len(filled_times),))
    filled_voicing = np.zeros((len(filled_times),))

    for time, freq, voc in zip(times, freqs, voicing):
        t_idx = int(np.round(time / contour_hop))
        if time > max_time or t_idx >= n_stamps:
            continue
        filled_freqs[t_idx] = freq
        filled_voicing[t_idx] = voc

    return filled_times, filled_freqs, filled_voicing


# no decorator because of https://github.com/mir-dataset-loaders/mirdata/issues/503
[docs]def load_pitch_contour(jams_path, string_num):
    """Load a guitarset pitch contour annotation for a given string

    Args:
        jams_path (str): path to the jams annotation file
        string_num (int), in range(6): Which string to load.
            0 is the Low E string, 5 is the high e string.

    Returns:
        F0Data: Pitch contour data for the given string

    """
    try:
        with open(jams_path, "r") as fhandle:
            jam = jams.load(fhandle)
    except FileNotFoundError:
        raise FileNotFoundError("jams_path {} does not exist".format(jams_path))

    anno_arr = jam.search(namespace="pitch_contour")
    anno = anno_arr.search(data_source=str(string_num))[0]
    times, values = anno.to_event_values()
    if len(times) == 0:
        return None
    frequencies = np.array([v["frequency"] for v in values])
    voicing = np.array([float(v["voiced"]) for v in values])
    voicing[frequencies == 0] = 0

    filled_times, filled_freqs, filled_voicing = _fill_pitch_contour(
        times, frequencies, voicing, np.max(times), CONTOUR_HOP
    )

    return annotations.F0Data(
        filled_times, "s", filled_freqs, "hz", filled_voicing, "binary"
    )


# no decorator because of https://github.com/mir-dataset-loaders/mirdata/issues/503
[docs]def load_notes(jams_path, string_num):
    """Load a guitarset note annotation for a given string

    Args:
        jams_path (str): path to the jams annotation file
        string_num (int), in range(6): Which string to load.
            0 is the Low E string, 5 is the high e string.

    Returns:
        NoteData: Note data for the given string

    """
    try:
        with open(jams_path) as fhandle:
            jam = jams.load(fhandle)
    except FileNotFoundError:
        raise FileNotFoundError("jams_path {} does not exist".format(jams_path))

    anno_arr = jam.search(namespace="note_midi")
    anno = anno_arr.search(data_source=str(string_num))[0]
    intervals, values = anno.to_interval_values()
    if len(values) == 0:
        return None
    return annotations.NoteData(intervals, "s", np.array(values), "midi")


[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The guitarset dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="guitarset",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            license_info=LICENSE_INFO,
        )

[docs]    @deprecated(reason="Use mirdata.datasets.guitarset.load_audio", version="0.3.4")
    def load_audio(self, *args, **kwargs):
        return load_audio(*args, **kwargs)

[docs]    @deprecated(
        reason="Use mirdata.datasets.guitarset.load_multitrack_audio", version="0.3.4"
    )
    def load_multitrack_audio(self, *args, **kwargs):
        return load_multitrack_audio(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.guitarset.load_beats", version="0.3.4")
    def load_beats(self, *args, **kwargs):
        return load_beats(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.guitarset.load_chords", version="0.3.4")
    def load_chords(self, *args, **kwargs):
        return load_chords(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.guitarset.load_key_mode", version="0.3.4")
    def load_key_mode(self, *args, **kwargs):
        return load_key_mode(*args, **kwargs)

[docs]    @deprecated(
        reason="Use mirdata.datasets.guitarset.load_pitch_contour", version="0.3.4"
    )
    def load_pitch_contour(self, *args, **kwargs):
        return load_pitch_contour(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.guitarset.load_notes", version="0.3.4")
    def load_notes(self, *args, **kwargs):
        return load_notes(*args, **kwargs)