Source code for mirdata.datasets.phenicx_anechoic

"""PHENICX-Anechoic Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    This dataset includes audio and annotations useful for tasks as score-informed source separation, score following, multi-pitch estimation, transcription or instrument detection, in the context of symphonic music:
    M. Miron, J. Carabias-Orti, J. J. Bosch, E. Gómez and J. Janer, "Score-informed source separation for multi-channel orchestral recordings", Journal of Electrical and Computer Engineering (2016))"

    We do not provide the original audio files, which can be found at the web page hosted by Aalto University. However, with their permission we distribute the denoised versions for some of the anechoic orchestral recordings. The original dataset was introduced in:
    Pätynen, J., Pulkki, V., and Lokki, T., "Anechoic recording system for symphony orchestra," Acta Acustica united with Acustica, vol. 94, nr. 6, pp. 856-865, November/December 2008.

    Additionally, we provide the associated musical note onset and offset annotations, and the Roomsim configuration files used to generate the multi-microphone recordings.

    The original anechoic dataset in Pätynen et al. consists of four passages of symphonic music from the Classical and Romantic periods. This work presented a set of anechoic recordings for each of the instruments, which were then synchronized between them so that they could later be combined to a mix of the orchestra. In order to keep the evaluation setup consistent between the four pieces, we selected the following instruments: violin, viola, cello, double bass, oboe, flute, clarinet, horn, trumpet and bassoon. A list of the characteristics of the four pieces can be found below:

    Mozart
    - duration: 3min 47s
    - period: classical
    - no. sources: 8
    - total no. instruments: 10
    - max. instruments/source: 2

    Beethoven
    - duration: 3min 11s
    - period: classical
    - no. sources: 10
    - total no. instruments: 20
    - max. instruments/source: 4

    Beethoven
    - duration: 2min 12s
    - period: romantic
    - no. sources: 10
    - total no. instruments: 30
    - max. instruments/source: 4

    Bruckner
    - duration: 1min 27s
    - period: romantic
    - no. sources: 10
    - total no. instruments: 39
    - max. instruments/source: 12

    For more details, please visit: https://www.upf.edu/web/mtg/phenicx-anechoic

"""

from typing import BinaryIO, Optional, TextIO, Tuple, cast

from deprecated.sphinx import deprecated
import librosa
import numpy as np

from mirdata import annotations, core, download_utils, io

BIBTEX = """
@article{miron2016score,
  title={Score-informed source separation for multichannel orchestral recordings},
  author={Miron, Marius and Carabias-Orti, Julio J and Bosch, Juan J and G{\'o}mez, Emilia and Janer, Jordi},
  journal={Journal of Electrical and Computer Engineering},
  volume={2016},
  year={2016},
  publisher={Hindawi}
}
@article{patynen2008anechoic,
  title={Anechoic recording system for symphony orchestra},
  author={P{\"a}tynen, Jukka and Pulkki, Ville and Lokki, Tapio},
  journal={Acta Acustica united with Acustica},
  volume={94},
  number={6},
  pages={856--865},
  year={2008},
  publisher={S. Hirzel Verlag}
}
"""

INDEXES = {
    "default": "1.0",
    "test": "sample",
    "1.0": core.Index(
        filename="phenicx_anechoic_index_1.0.json",
        url="https://zenodo.org/records/14024469/files/phenicx_anechoic_index_1.0.json?download=1",
        checksum="f2e8106ef7a59d474fe3e26155144e6b",
    ),
    "sample": core.Index(filename="phenicx_anechoic_index_1.0_sample.json"),
}

REMOTES = {
    "all": download_utils.RemoteFileMetadata(
        filename="PHENICX-Anechoic.zip",
        url="https://zenodo.org/record/840025/files/PHENICX-Anechoic.zip?download=1",
        checksum="7fec47568263476ecac0103aef608629",
        unpack_directories=["PHENICX-Anechoic"],
    )
}

LICENSE_INFO = """
Creative Commons Attribution Non Commercial Share Alike 4.0 International
"""

DATASET_SECTIONS = {
    "doublebass": "strings",
    "cello": "strings",
    "clarinet": "woodwinds",
    "viola": "strings",
    "violin": "strings",
    "oboe": "woodwinds",
    "flute": "woodwinds",
    "trumpet": "brass",
    "bassoon": "woodwinds",
    "horn": "brass",
}



[docs]
class Track(core.Track):
    """Phenicx-Anechoic Track class

    Args:
        track_id (str): track id of the track

    Attributes:
        audio_path (list): path to the audio files
        notes_path (list): path to the score files
        notes_original_path (list): path to the original score files
        instrument (str): the name of the instrument
        piece (str): the name of the piece
        n_voices (int): the number of voices in this instrument
        track_id (str): track id

    Cached Properties:
        notes (NoteData): notes annotations that have been time-aligned to the audio
        notes_original (NoteData): original score representation, not time-aligned

    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.instrument = self.track_id.split("-")[1]
        self.piece = self.track_id.split("-")[0]

        self.audio_paths = [
            self.get_path(key) for key in self._track_paths if "audio_" in key
        ]

        self.n_voices = len(self.audio_paths)

        self.notes_path = self.get_path("notes")
        self.notes_original_path = self.get_path("notes_original")

    @property
    def audio(self) -> Optional[Tuple[np.ndarray, float]]:
        """the track's audio

        Returns:
            * np.ndarray - the mono audio signal
            * float - The sample rate of the audio file

        """
        audio_mix, sr = cast(Tuple[np.ndarray, float], load_audio(self.audio_paths[0]))

        for i in range(1, self.n_voices):
            audio, _ = cast(Tuple[np.ndarray, float], load_audio(self.audio_paths[i]))
            audio_mix += audio
        audio_mix /= self.n_voices

        return audio_mix, sr

    @core.cached_property
    def notes(self) -> Optional[annotations.NoteData]:
        """the track's notes corresponding to the score aligned to the audio

        Returns:
            NoteData: Note data for the track

        """
        return load_score(self.notes_path)

    @core.cached_property
    def notes_original(self) -> Optional[annotations.NoteData]:
        """the track's notes corresponding to the original score

        Returns:
            NoteData: Note data for the track

        """
        return load_score(self.notes_original_path)


[docs]
    def get_audio_voice(self, id_voice: int) -> Optional[Tuple[np.ndarray, float]]:
        """the track's audio

        Args:
            id_voice (int): The integer identifier for the voice
                e.g. 2 for bassoon-2

        Returns:
            * np.ndarray - the mono audio signal
            * float - The sample rate of the audio file

        """
        if id_voice >= self.n_voices:
            raise ValueError("id_voice={} is out of range".format(id_voice))
        return load_audio(self.audio_paths[id_voice])





[docs]
class MultiTrack(core.MultiTrack):
    """Phenicx-Anechoic MultiTrack class

    Args:
        mtrack_id (str): track id of the track
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets/Phenicx-Anechoic`

    Attributes:
        track_audio_property (str): the attribute of track which is used for mixing
        mtrack_id (str): multitrack id
        piece (str): the classical music piece associated with this multitrack
        tracks (dict): dict of track ids and the corresponding Tracks
        instruments (dict): dict of instruments and the corresponding track
        sections (dict): dict of sections and the corresponding list of tracks for each section

    """

    def __init__(
        self, mtrack_id, data_home, dataset_name, index, track_class, metadata
    ):
        super().__init__(mtrack_id, data_home, dataset_name, index, Track, metadata)

        #### parse the keys for the dictionary of instruments and strings
        self.instruments = {
            source.replace(self.mtrack_id + "-", ""): source
            for source in self.track_ids
        }
        self.sections = {"brass": [], "strings": [], "woodwinds": []}
        for instrument, track_id in self.instruments.items():
            self.sections[DATASET_SECTIONS[instrument]].append(track_id)

        self.piece = self.mtrack_id

    @property
    def track_audio_property(self):
        #### the attribute of Track which returns the relevant audio file for mixing
        return "audio"


[docs]
    def get_audio_for_instrument(self, instrument):
        """Get the audio for a particular instrument

        Args:
            instrument (str): the instrument to get audio for

        Returns:
            np.ndarray: instrument audio with shape (n_samples, n_channels)

        """
        if instrument not in self.instruments.keys():
            raise ValueError(
                "instrument={} is not in this multitrack. Must be one of {}".format(
                    instrument, self.instruments.keys()
                )
            )

        return getattr(
            self.tracks[self.instruments[instrument]], self.track_audio_property
        )[0]



[docs]
    def get_audio_for_section(self, section):
        """Get the audio for a particular section

        Args:
            section (str): the section to get audio for

        Returns:
            np.ndarray: section audio with shape (n_samples, n_channels)

        """
        if section not in self.sections.keys():
            raise ValueError(
                "section={} is not valid for this multitrack, must be one of {}".format(
                    section, self.sections.keys()
                )
            )
        return self.get_target(self.sections[section])



[docs]
    def get_notes_target(self, track_keys, notes_property="notes"):
        """Get the notes for all the tracks

        Args:
            track_keys (list): list of track keys to get the NoteData for
            notes_property (str): the attribute associated with NoteData, notes or notes_original

        Returns:
            NoteData: Note data for the tracks

        """
        notes_target = None
        for k in track_keys:
            score = getattr(self.tracks[k], notes_property)
            if notes_target is None:
                notes_target = score
            else:
                notes_target += score
        return notes_target



[docs]
    def get_notes_for_instrument(self, instrument, notes_property="notes"):
        """Get the notes for a particular instrument

        Args:
            instrument (str): the instrument to get the notes for
            notes_property (str): the attribute associated with NoteData, notes or notes_original

        Returns:
            NoteData: Note data for the instrument

        """
        return getattr(self.tracks[self.instruments[instrument]], notes_property)



[docs]
    def get_notes_for_section(self, section, notes_property="notes"):
        """Get the notes for a particular section

        Args:
            section (str): the section to get the notes for
            notes_property (str): the attribute associated with NoteData, notes or notes_original

        Returns:
            NoteData: Note data for the section

        """
        return self.get_notes_target(
            self.sections[section], notes_property=notes_property
        )





[docs]
@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
    """Load a Phenicx-Anechoic audio file.

    Args:
        fhandle (str or file-like): File-like object or path to audio file

    Returns:
        * np.ndarray - the audio signal
        * float - The sample rate of the audio file

    """
    return librosa.load(fhandle, sr=None, mono=True)




[docs]
@io.coerce_to_string_io
def load_score(fhandle: TextIO) -> annotations.NoteData:
    """Load a Phenicx-Anechoic score file.

    Args:
        fhandle (str or file-like): File-like object or path to score file

    Returns:
        NoteData: Note data for the given track
    """

    #### read start, end times
    intervals = np.loadtxt(fhandle, delimiter=",", usecols=[0, 1], dtype=np.float64)

    #### read notes as string
    fhandle.seek(0)
    content = fhandle.readlines()
    values = np.array(
        [librosa.note_to_hz(line.split(",")[2].strip("\n")) for line in content]
    )

    return annotations.NoteData(intervals, "s", values, "hz")




[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The Phenicx-Anechoic dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="phenicx_anechoic",
            track_class=Track,
            multitrack_class=MultiTrack,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            license_info=LICENSE_INFO,
        )


[docs]
    @deprecated(
        reason="Use mirdata.datasets.phenicx_anechoic.load_audio", version="0.3.4"
    )
    def load_audio(self, *args, **kwargs):
        return load_audio(*args, **kwargs)



[docs]
    @deprecated(
        reason="Use mirdata.datasets.phenicx_anechoic.load_score", version="0.3.4"
    )
    def load_score(self, *args, **kwargs):
        return load_score(*args, **kwargs)