Source code for mirdata.datasets.guitarset

# -*- coding: utf-8 -*-
"""GuitarSet Loader

.. admonition:: Dataset Info
    :class: dropdown

    GuitarSet provides audio recordings of a variety of musical excerpts
    played on an acoustic guitar, along with time-aligned annotations
    including pitch contours, string and fret positions, chords, beats,
    downbeats, and keys.

    GuitarSet contains 360 excerpts that are close to 30 seconds in length.
    The 360 excerpts are the result of the following combinations:

    - 6 players
    - 2 versions: comping (harmonic accompaniment) and soloing (melodic improvisation)
    - 5 styles: Rock, Singer-Songwriter, Bossa Nova, Jazz, and Funk
    - 3 Progressions: 12 Bar Blues, Autumn Leaves, and Pachelbel Canon.
    - 2 Tempi: slow and fast.

    The tonality (key) of each excerpt is sampled uniformly at random.

    GuitarSet was recorded with the help of a hexaphonic pickup, which outputs
    signals for each string separately, allowing automated note-level annotation.
    Excerpts are recorded with both the hexaphonic pickup and a Neumann U-87
    condenser microphone as reference.
    3 audio recordings are provided with each excerpt with the following suffix:

    - hex: original 6 channel wave file from hexaphonic pickup
    - hex_cln: hex wave files with interference removal applied
    - mic: monophonic recording from reference microphone
    - mix: monophonic mixture of original 6 channel file

    Each of the 360 excerpts has an accompanying JAMS file which stores 16 annotations.

    - 6 pitch_contour annotations (1 per string)
    - 6 midi_note annotations (1 per string)

    Beat and Tempo:

    - 1 beat_position annotation
    - 1 tempo annotation


    - 2 chord annotations: instructed and performed. The instructed chord annotation
      is a digital version of the lead sheet that's provided to the player, and the
      performed chord annotations are inferred from note annotations, using
      segmentation and root from the digital lead sheet annotation.

    For more details, please visit:

import logging
import os
import jams
import librosa
import numpy as np

from mirdata import download_utils
from mirdata import core
from mirdata import annotations

BIBTEX = """@inproceedings{xi2018guitarset,
title={GuitarSet: A Dataset for Guitar Transcription},
author={Xi, Qingyang and Bittner, Rachel M and Ye, Xuzhou and Pauwels, Johan and Bello, Juan P},
booktitle={International Society of Music Information Retrieval (ISMIR)},

    "annotations": download_utils.RemoteFileMetadata(
    "audio_hex_debleeded": download_utils.RemoteFileMetadata(
    "audio_hex_original": download_utils.RemoteFileMetadata(
    "audio_mic": download_utils.RemoteFileMetadata(
    "audio_mix": download_utils.RemoteFileMetadata(
    "Jazz": "Jazz",
    "BN": "Bossa Nova",
    "Rock": "Rock",
    "SS": "Singer-Songwriter",
    "Funk": "Funk",
_GUITAR_STRINGS = ["E", "A", "D", "G", "B", "e"]
DATA = core.LargeData("guitarset_index.json")


[docs]class Track(core.Track): """guitarset Track class Args: track_id (str): track id of the track Attributes: audio_hex_cln_path (str): path to the debleeded hex wave file audio_hex_path (str): path to the original hex wave file audio_mic_path (str): path to the mono wave via microphone audio_mix_path (str): path to the mono wave via downmixing hex pickup jams_path (str): path to the jams file mode (str): one of ['solo', 'comp'] For each excerpt, players are asked to first play in 'comp' mode and later play a 'solo' version on top of the already recorded comp. player_id (str): ID of the different players. one of ['00', '01', ... , '05'] style (str): one of ['Jazz', 'Bossa Nova', 'Rock', 'Singer-Songwriter', 'Funk'] tempo (float): BPM of the track track_id (str): track id Cached Properties: beats (BeatData): beat positions leadsheet_chords (ChordData): chords as written in the leadsheet inferred_chords (ChordData): chords inferred from played transcription key_mode (KeyData): key and mode pitch_contours (dict): Pitch contours per string - 'E': F0Data(...) - 'A': F0Data(...) - 'D': F0Data(...) - 'G': F0Data(...) - 'B': F0Data(...) - 'e': F0Data(...) notes (dict): Notes per string - 'E': NoteData(...) - 'A': NoteData(...) - 'D': NoteData(...) - 'G': NoteData(...) - 'B': NoteData(...) - 'e': NoteData(...) """ def __init__(self, track_id, data_home): if track_id not in DATA.index["tracks"]: raise ValueError("{} is not a valid track ID in GuitarSet".format(track_id)) self.track_id = track_id self._data_home = data_home self._track_paths = DATA.index["tracks"][track_id] self.audio_hex_cln_path = os.path.join( self._data_home, self._track_paths["audio_hex_cln"][0] ) self.audio_hex_path = os.path.join( self._data_home, self._track_paths["audio_hex"][0] ) self.audio_mic_path = os.path.join( self._data_home, self._track_paths["audio_mic"][0] ) self.audio_mix_path = os.path.join( self._data_home, self._track_paths["audio_mix"][0] ) self.jams_path = os.path.join(self._data_home, self._track_paths["jams"][0]) title_list = track_id.split("_") # [PID, S-T-K, mode, rec_mode] style, tempo, _ = title_list[1].split("-") # [style, tempo, key] self.player_id = title_list[0] self.mode = title_list[2] self.tempo = float(tempo) = _STYLE_DICT[style[:-1]] @core.cached_property def beats(self): return load_beats(self.jams_path) @core.cached_property def leadsheet_chords(self): if self.mode == "solo": "Chord annotations for solo excerpts are the same with the comp excerpt." ) return load_chords(self.jams_path, leadsheet_version=True) @core.cached_property def inferred_chords(self): if self.mode == "solo": "Chord annotations for solo excerpts are the same as the comp excerpt." ) return load_chords(self.jams_path, leadsheet_version=False) @core.cached_property def key_mode(self): return load_key_mode(self.jams_path) @core.cached_property def pitch_contours(self): contours = {} # iterate over 6 strings for i in range(6): contours[_GUITAR_STRINGS[i]] = load_pitch_contour(self.jams_path, i) return contours @core.cached_property def notes(self): notes = {} # iterate over 6 strings for i in range(6): notes[_GUITAR_STRINGS[i]] = load_notes(self.jams_path, i) return notes @property def audio_mic(self): """The track's audio Returns: * np.ndarray - audio signal * float - sample rate """ audio, sr = load_audio(self.audio_mic_path) return audio, sr @property def audio_mix(self): """Mixture audio (mono) Returns: * np.ndarray - audio signal * float - sample rate """ audio, sr = load_audio(self.audio_mix_path) return audio, sr @property def audio_hex(self): """Hexaphonic audio (6-channels) with one channel per string Returns: * np.ndarray - audio signal * float - sample rate """ audio, sr = load_multitrack_audio(self.audio_hex_path) return audio, sr @property def audio_hex_cln(self): """Hexaphonic audio (6-channels) with one channel per string after bleed removal Returns: * np.ndarray - audio signal * float - sample rate """ audio, sr = load_multitrack_audio(self.audio_hex_cln_path) return audio, sr
[docs] def to_jams(self): """Get the track's data in jams format Returns: jams.JAMS: the track's data in jams format """ return jams.load(self.jams_path)
[docs]def load_audio(audio_path): """Load a Guitarset audio file. Args: audio_path (str): path to audio file Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=None, mono=True)
[docs]def load_multitrack_audio(audio_path): """Load a Guitarset multitrack audio file. Args: audio_path (str): path to audio file Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=None, mono=False)
[docs]def load_beats(jams_path): """Load a Guitarset beats annotation. Args: jams_path (str): Path of the jams annotation file Returns: BeatData: Beat data """ if not os.path.exists(jams_path): raise IOError("jams_path {} does not exist".format(jams_path)) jam = jams.load(jams_path) anno ="beat_position")[0] times, values = anno.to_event_values() positions = [int(v["position"]) for v in values] return annotations.BeatData(times, np.array(positions))
[docs]def load_chords(jams_path, leadsheet_version=True): """Load a guitarset chord annotation. Args: jams_path (str): Path of the jams annotation file leadsheet_version (Bool): Whether or not to load the leadsheet version of the chord annotation If False, load the infered version. Returns: ChordData: Chord data """ if not os.path.exists(jams_path): raise IOError("jams_path {} does not exist".format(jams_path)) jam = jams.load(jams_path) if leadsheet_version: anno ="chord")[0] else: anno ="chord")[1] intervals, values = anno.to_interval_values() return annotations.ChordData(intervals, values)
[docs]def load_key_mode(jams_path): """Load a Guitarset key-mode annotation. Args: jams_path (str): Path of the jams annotation file Returns: KeyData: Key data """ if not os.path.exists(jams_path): raise IOError("jams_path {} does not exist".format(jams_path)) jam = jams.load(jams_path) anno ="key_mode")[0] intervals, values = anno.to_interval_values() return annotations.KeyData(intervals, values)
[docs]def load_pitch_contour(jams_path, string_num): """Load a guitarset pitch contour annotation for a given string Args: jams_path (str): Path of the jams annotation file string_num (int), in range(6): Which string to load. 0 is the Low E string, 5 is the high e string. Returns: F0Data: Pitch contour data for the given string """ if not os.path.exists(jams_path): raise IOError("jams_path {} does not exist".format(jams_path)) jam = jams.load(jams_path) anno_arr ="pitch_contour") anno =[0] times, values = anno.to_event_values() if len(times) == 0: return None frequencies = [v["frequency"] for v in values] return annotations.F0Data(times, np.array(frequencies))
[docs]def load_notes(jams_path, string_num): """Load a guitarset note annotation for a given string Args: jams_path (str): Path of the jams annotation file string_num (int), in range(6): Which string to load. 0 is the Low E string, 5 is the high e string. Returns: NoteData: Note data for the given string """ if not os.path.exists(jams_path): raise IOError("jams_path {} does not exist".format(jams_path)) jam = jams.load(jams_path) anno_arr ="note_midi") anno =[0] intervals, values = anno.to_interval_values() if len(values) == 0: return None return annotations.NoteData(intervals, np.array(values))
[docs]@core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """ The guitarset dataset """ def __init__(self, data_home=None): super().__init__( data_home, index=DATA.index, name="guitarset", track_object=Track, bibtex=BIBTEX, remotes=REMOTES, license_info=LICENSE_INFO, )
[docs] @core.copy_docs(load_audio) def load_audio(self, *args, **kwargs): return load_audio(*args, **kwargs)
[docs] @core.copy_docs(load_multitrack_audio) def load_multitrack_audio(self, *args, **kwargs): return load_multitrack_audio(*args, **kwargs)
[docs] @core.copy_docs(load_beats) def load_beats(self, *args, **kwargs): return load_beats(*args, **kwargs)
[docs] @core.copy_docs(load_chords) def load_chords(self, *args, **kwargs): return load_chords(*args, **kwargs)
[docs] @core.copy_docs(load_key_mode) def load_key_mode(self, *args, **kwargs): return load_key_mode(*args, **kwargs)
[docs] @core.copy_docs(load_pitch_contour) def load_pitch_contour(self, *args, **kwargs): return load_pitch_contour(*args, **kwargs)
[docs] @core.copy_docs(load_notes) def load_notes(self, *args, **kwargs): return load_notes(*args, **kwargs)