Source code for mirdata.datasets.salami

"""SALAMI Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    The SALAMI dataset contains Structural Annotations of a Large Amount of Music
    Information: the public portion contains over 2200 annotations of over 1300
    unique tracks.

    NB: mirdata relies on the **corrected** version of the 2.0 annotations:
    Details can be found at https://github.com/bmcfee/salami-data-public/tree/hierarchy-corrections and
    https://github.com/DDMAL/salami-data-public/pull/15.

    For more details, please visit: https://github.com/DDMAL/salami-data-public

"""
import csv
import os
from typing import Optional, TextIO, Tuple

from deprecated.sphinx import deprecated
import librosa
import numpy as np
from smart_open import open

from mirdata import annotations, core, download_utils, io, jams_utils


BIBTEX = """@inproceedings{smith2011salami,
    title={Design and creation of a large-scale database of structural annotations.},
    author={Smith, Jordan Bennett Louis and Burgoyne, John Ashley and
          Fujinaga, Ichiro and De Roure, David and Downie, J Stephen},
    booktitle={12th International Society for Music Information Retrieval Conference},
    year={2011},
    series = {ISMIR},
}"""

INDEXES = {
    "default": "2.0-corrected",
    "test": "2.0-corrected",
    "2.0-corrected": core.Index(filename="salami_index_2.0-corrected.json"),
}

REMOTES = {
    "annotations": download_utils.RemoteFileMetadata(
        filename="salami-data-public-hierarchy-corrections.zip",
        url="https://github.com/bmcfee/salami-data-public/archive/hierarchy-corrections.zip",
        checksum="194add2601c09a7279a7433288de81fd",
    )
}
DOWNLOAD_INFO = """
    Unfortunately the audio files of the Salami dataset are not available
    for download. If you have the Salami dataset, place the contents into a
    folder called Salami with the following structure:
        > Salami/
            > salami-data-public-hierarchy-corrections/
            > audio/
    and copy the Salami folder to {}
"""

LICENSE_INFO = """
This data is released under a Creative Commons 0 license, effectively dedicating it to
the public domain. More information about this dedication and your rights, please see the
details here: http://creativecommons.org/publicdomain/zero/1.0/ and
http://creativecommons.org/publicdomain/zero/1.0/legalcode.
"""


[docs]class Track(core.Track):
    """salami Track class

    Args:
        track_id (str): track id of the track

    Attributes:
        annotator_1_id (str): number that identifies annotator 1
        annotator_1_time (str): time that the annotator 1 took to complete the annotation
        annotator_2_id (str): number that identifies annotator 1
        annotator_2_time (str): time that the annotator 1 took to complete the annotation
        artist (str): song artist
        audio_path (str): path to the audio file
        broad_genre (str): broad genre of the song
        duration (float): duration of song in seconds
        genre (str): genre of the song
        sections_annotator1_lowercase_path (str): path to annotations in hierarchy level 1 from annotator 1
        sections_annotator1_uppercase_path (str): path to annotations in hierarchy level 0 from annotator 1
        sections_annotator2_lowercase_path (str): path to annotations in hierarchy level 1 from annotator 2
        sections_annotator2_uppercase_path (str): path to annotations in hierarchy level 0 from annotator 2
        source (str): dataset or source of song
        title (str): title of the song

    Cached Properties:
        sections_annotator_1_uppercase (SectionData): annotations in hierarchy level 0 from annotator 1
        sections_annotator_1_lowercase (SectionData): annotations in hierarchy level 1 from annotator 1
        sections_annotator_2_uppercase (SectionData): annotations in hierarchy level 0 from annotator 2
        sections_annotator_2_lowercase (SectionData): annotations in hierarchy level 1 from annotator 2
    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.sections_annotator1_uppercase_path = self.get_path("annotator_1_uppercase")
        self.sections_annotator1_lowercase_path = self.get_path("annotator_1_lowercase")
        self.sections_annotator2_uppercase_path = self.get_path("annotator_2_uppercase")
        self.sections_annotator2_lowercase_path = self.get_path("annotator_2_lowercase")

        self.audio_path = self.get_path("audio")

    @property
    def source(self):
        return self._track_metadata.get("source")

    @property
    def annotator_1_id(self):
        return self._track_metadata.get("annotator_1_id")

    @property
    def annotator_2_id(self):
        return self._track_metadata.get("annotator_2_id")

    @property
    def duration(self):
        return self._track_metadata.get("duration")

    @property
    def title(self):
        return self._track_metadata.get("title")

    @property
    def artist(self):
        return self._track_metadata.get("artist")

    @property
    def annotator_1_time(self):
        return self._track_metadata.get("annotator_1_time")

    @property
    def annotator_2_time(self):
        return self._track_metadata.get("annotator_2_time")

    @property
    def broad_genre(self):
        return self._track_metadata.get("class")

    @property
    def genre(self):
        return self._track_metadata.get("genre")

    @core.cached_property
    def sections_annotator_1_uppercase(self) -> Optional[annotations.SectionData]:
        return load_sections(self.sections_annotator1_uppercase_path)

    @core.cached_property
    def sections_annotator_1_lowercase(self) -> Optional[annotations.SectionData]:
        return load_sections(self.sections_annotator1_lowercase_path)

    @core.cached_property
    def sections_annotator_2_uppercase(self) -> Optional[annotations.SectionData]:
        return load_sections(self.sections_annotator2_uppercase_path)

    @core.cached_property
    def sections_annotator_2_lowercase(self) -> Optional[annotations.SectionData]:
        return load_sections(self.sections_annotator2_lowercase_path)

    @property
    def audio(self) -> Tuple[np.ndarray, float]:
        """The track's audio

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_path)

[docs]    def to_jams(self):
        """Get the track's data in jams format

        Returns:
            jams.JAMS: the track's data in jams format

        """
        return jams_utils.jams_converter(
            audio_path=self.audio_path,
            multi_section_data=[
                (
                    [
                        (self.sections_annotator_1_uppercase, 0),
                        (self.sections_annotator_1_lowercase, 1),
                    ],
                    "annotator_1",
                ),
                (
                    [
                        (self.sections_annotator_2_uppercase, 0),
                        (self.sections_annotator_2_lowercase, 1),
                    ],
                    "annotator_2",
                ),
            ],
            metadata=self._track_metadata,
        )


# no decorator here because of https://github.com/librosa/librosa/issues/1267
[docs]def load_audio(fpath: str) -> Tuple[np.ndarray, float]:
    """Load a Salami audio file.

    Args:
        fpath (str): path to audio file

    Returns:
        * np.ndarray - the mono audio signal
        * float - The sample rate of the audio file

    """
    return librosa.load(fpath, sr=None, mono=True)


[docs]@io.coerce_to_string_io
def load_sections(fhandle: TextIO) -> annotations.SectionData:
    """Load salami sections data from a file

    Args:
        fhandle (str or file-like): File-like object or path to section annotation file

    Returns:
        SectionData: section data

    """
    times = []
    secs = []
    reader = csv.reader(fhandle, delimiter="\t")
    for line in reader:
        times.append(float(line[0]))
        secs.append(line[1])
    times = np.array(times)  # type: ignore
    secs = np.array(secs)  # type: ignore

    # remove sections with length == 0
    times_revised = np.delete(times, np.where(np.diff(times) == 0))
    secs_revised = np.delete(secs, np.where(np.diff(times) == 0))
    return annotations.SectionData(
        np.array([times_revised[:-1], times_revised[1:]]).T,
        "s",
        list(secs_revised[:-1]),
        "open",
    )


[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The salami dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="salami",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            download_info=DOWNLOAD_INFO,
            license_info=LICENSE_INFO,
        )

    @core.cached_property
    def _metadata(self):
        metadata_path = os.path.join(
            self.data_home,
            os.path.join(
                "salami-data-public-hierarchy-corrections", "metadata", "metadata.csv"
            ),
        )

        try:
            with open(metadata_path, "r") as fhandle:
                reader = csv.reader(fhandle, delimiter=",")
                raw_data = []
                for line in reader:
                    if line != []:
                        if line[0] == "SONG_ID":
                            continue
                        raw_data.append(line)
        except FileNotFoundError:
            raise FileNotFoundError("Metadata not found. Did you run .download()?")

        metadata_index = {}
        for line in raw_data:
            track_id = line[0]
            duration = None
            if line[5] != "":
                duration = float(line[5])
            metadata_index[track_id] = {
                "source": line[1],
                "annotator_1_id": line[2],
                "annotator_2_id": line[3],
                "duration": duration,
                "title": line[7],
                "artist": line[8],
                "annotator_1_time": line[10],
                "annotator_2_time": line[11],
                "class": line[14],
                "genre": line[15],
            }

        return metadata_index

[docs]    @deprecated(reason="Use mirdata.datasets.salami.load_audio", version="0.3.4")
    def load_audio(self, *args, **kwargs):
        return load_audio(*args, **kwargs)

[docs]    @deprecated(reason="Use mirdata.datasets.salami.load_sections", version="0.3.4")
    def load_sections(self, *args, **kwargs):
        return load_sections(*args, **kwargs)