Source code for mirdata.datasets.queen

"""Queen Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    Queen Dataset includes chord, key, and segmentation
    annotations for 51 Queen songs. Details can be found in http://matthiasmauch.net/_pdf/mauch_omp_2009.pdf and
    http://isophonics.net/content/reference-annotations-queen.

    The CDs used in this dataset are:
    Queen: Greatest Hits I, Parlophone, 0777 7 8950424
    Queen: Greatest Hits II, Parlophone, CDP 7979712
    Queen: Greatest Hits III, Parlophone, 7243 52389421

    In the progress of labelling the chords, C4DM researchers used the following literature to verify their judgements:

    Queen, Greatest Hits I, International Music Publications Ltd, London, ISBN 0-571-52828-7

    Queen, Greatest Hits II, Queen Music Ltd./EMI Music Publishing (Barnes Music Engraving), ISBN 0-86175-465-4

    Acknowledgements
    We'd like to thank our student annotators:

    Eric Gyingy
    Diako Rasoul
    Felix Stiller
    Helena du Toit
    Vinh Ton
    Chuks Chiejine
"""

import csv
import os
from typing import Tuple, TextIO, Optional, BinaryIO

from deprecated.sphinx import deprecated
import librosa
import numpy as np

from mirdata import download_utils, annotations, io, core, jams_utils

BIBTEX = """@inproceedings{mauch2009beatles,
    title={OMRAS2 metadata project 2009},
    author={Mauch, Matthias and Cannam, Chris and Davies, Matthew and Dixon, Simon and Harte,
    Christopher and Kolozali, Sefki and Tidhar, Dan and Sandler, Mark},
    booktitle={12th International Society for Music Information Retrieval Conference},
    year={2009},
    series = {ISMIR}
}"""
LICENSE_INFO = (
    "Unfortunately we couldn't find the license information for Queen dataset."
)

INDEXES = {
    "default": "1.0",
    "test": "1.0",
    "1.0": core.Index(filename="queen_index_1.0.json"),
}

REMOTES = {
    "annotations": download_utils.RemoteFileMetadata(
        filename="Queen Annotations.tar.gz",
        url="http://isophonics.net/files/annotations/Queen%20Annotations.tar.gz",
        checksum="fe11217d32bc222ae418425441974046",
        destination_dir="annotations",
    )
}


DOWNLOAD_INFO = """
        Unfortunately the audio files of Queen dataset are not available
        for download. If you have Queen dataset, place the contents into
        a folder called Queen with the following structure:
            > Queen/
                > annotations/
                > audio/
        and copy Queen folder to {}
"""


[docs]class Track(core.Track):
    """Queen track class

    Args:
        track_id (str): track id of the track

    Attributes:
        audio_path (str): track audio path
        chords_path (str): chord annotation path
        keys_path (str): key annotation path
        sections_path (str): sections annotation path
        title (str): title of the track
        track_id (str): track id

    Cached Properties:
        chords (ChordData): human-labeled chord annotations
        key (KeyData): local key annotations
        sections (SectionData): section annotations
    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.chords_path = self.get_path("chords")
        self.keys_path = self.get_path("keys")
        self.sections_path = self.get_path("sections")
        self.audio_path = self.get_path("audio")

        self.title = os.path.basename(self.sections_path).split(".")[0]

    @core.cached_property
    def chords(self) -> Optional[annotations.ChordData]:
        return load_chords(self.chords_path)

    @core.cached_property
    def key(self) -> Optional[annotations.KeyData]:
        return load_key(self.keys_path)

    @core.cached_property
    def sections(self) -> Optional[annotations.SectionData]:
        return load_sections(self.sections_path)

    @property
    def audio(self) -> Optional[Tuple[np.ndarray, float]]:
        """The track's audio

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_path)

[docs]    def to_jams(self):
        """the track's data in jams format

        Returns:
            jams.JAMS: return track data in jam format

        """
        return jams_utils.jams_converter(
            audio_path=self.audio_path,
            section_data=[(self.sections, None)],
            chord_data=[(self.chords, None)],
            key_data=[(self.key, None)],
            metadata={"artist": "Queen", "title": self.title},
        )


[docs]@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
    """Load a Queen audio file.

    Args:
        fhandle (str): path to an audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    return librosa.load(fhandle, sr=44100, mono=True)


[docs]@io.coerce_to_string_io
def load_chords(fhandle: TextIO) -> annotations.ChordData:
    """Load Queen format chord data from a file

    Args:
        fhandle (str or file-like): path or file-like object pointing to a chord file

    Returns:
        (ChordData): loaded chord data

    """
    start_times, end_times, chords = [], [], []
    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        start_times.append(float(line[0]))
        end_times.append(float(line[1]))
        chords.append(line[2])

    return annotations.ChordData(
        np.array([start_times, end_times]).T, "s", chords, "harte"
    )


[docs]@io.coerce_to_string_io
def load_key(fhandle: TextIO) -> annotations.KeyData:
    """Load Queen format key data from a file

    Args:
        fhandle (str or file-like): path or file-like object pointing to a key file

    Returns:
        (KeyData): loaded key data

    """
    start_times, end_times, keys = [], [], []
    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        if line[2] == "Key":
            start_times.append(float(line[0]))
            end_times.append(float(line[1]))
            keys.append(line[3])

    return annotations.KeyData(
        np.array([start_times, end_times]).T, "s", keys, "key_mode"
    )


[docs]@io.coerce_to_string_io
def load_sections(fhandle: TextIO) -> annotations.SectionData:
    """Load Queen format section data from a file

    Args:
        fhandle (str or file-like): path or file-like object pointing to a section file

    Returns:
        (SectionData): loaded section data

    """
    start_times, end_times, sections = [], [], []
    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        start_times.append(float(line[0]))
        end_times.append(float(line[1]))
        sections.append(line[3])

    return annotations.SectionData(
        np.array([start_times, end_times]).T, "s", sections, "open"
    )


[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    Queen dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="queen",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            download_info=DOWNLOAD_INFO,
            license_info=LICENSE_INFO,
        )

[docs]    @deprecated(reason="Use mirdata.datasets.queen.load_audio", version="0.3.4")
    def load_audio(self, *args, **kwargs):
        return load_audio(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.queen.load_key", version="0.3.4")
    def load_key(self, *args, **kwargs):
        return load_key(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.queen.load_chords", version="0.3.4")
    def load_chords(self, *args, **kwargs):
        return load_chords(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.queen.load_sections", version="0.3.4")
    def load_sections(self, *args, **kwargs):
        return load_sections(*args, **kwargs)