Source code for musdb.audio_classes

import os
import numpy as np
import stempeg


[docs] class Track(object): """ Generic audio Track that can be wav or stem file Attributes ---------- name : str Track name path : str Absolute path of mixture audio file stem_id : int stem/substream ID is_wav : boolean If stem is read from wav or mp4 stem subset : {'train', 'test'} Track belongs to which subset. targets : OrderedDict OrderedDict of mixted Targets for this ``track``. sources : Dict Dict of ``Source`` objects for this ``track``. chunk_start : float sets offset when loading the audio, defaults to 0 (beginning). chunk_duration : float sets duration for the audio, defaults to ``None`` (end). """ def __init__( self, path="None", is_wav=False, stem_id=None, subset=None, chunk_start=0, chunk_duration=None, sample_rate=None ): self.path = path self.subset = subset self.stem_id = stem_id self.is_wav = is_wav self.chunk_start = chunk_start self.chunk_duration = chunk_duration self.sample_rate = sample_rate # load and store metadata if os.path.exists(self.path): self.info = stempeg.Info(self.path) self.samples = int(self.info.samples(self.stem_id)) self.duration = self.info.duration(self.stem_id) self.rate = self.info.rate(self.stem_id) else: # set to `None` if no path was set (fake file) self.info = None self.samples = None self.duration = None self.rate = None self._audio = None def __len__(self): return self.samples @property def audio(self): # return cached audio if explicitly set by setter if self._audio is not None: return self._audio # read from disk to save RAM otherwise else: return self.load_audio( self.path, self.stem_id, self.chunk_start, self.chunk_duration, self.sample_rate ) @audio.setter def audio(self, array): self._audio = array
[docs] def load_audio( self, path, stem_id, chunk_start=0, chunk_duration=None, sample_rate=None ): """array_like: [shape=(num_samples, num_channels)] """ if os.path.exists(self.path): if self.is_wav: stem_id = 0 audio, rate = stempeg.read_stems( filename=path, stem_id=stem_id, start=chunk_start, duration=chunk_duration, info=self.info, sample_rate=sample_rate, ffmpeg_format="s16le" ) self._rate = rate return audio else: self._rate = None self._audio = None raise ValueError("Oops! %s cannot be loaded" % path)
def __repr__(self): return "%s" % (self.path)
[docs] class MultiTrack(Track): def __init__( self, path=None, name=None, artist=None, title=None, sources=None, targets=None, sample_rate=None, *args, **kwargs ): super(MultiTrack, self).__init__(path=path, *args, **kwargs) self.name = name self.path = path try: split_name = name.split(' - ') self.artist = split_name[0] self.title = split_name[1] except IndexError: self.artist = artist self.title = title self.sources = sources self.targets = targets self.sample_rate = sample_rate self._stems = None @property def stems(self): """array_like: [shape=(stems, num_samples, num_channels)] """ # return cached audio it explicitly set bet setter if self._stems is not None: return self._stems # read from disk to save RAM otherwise else: if not self.is_wav and os.path.exists(self.path): S, rate = stempeg.read_stems( filename=self.path, start=self.chunk_start, duration=self.chunk_duration, info=self.info, sample_rate=self.sample_rate, ffmpeg_format="s16le" ) else: rate = self.rate S = [] S.append(self.audio) # append sources in order of stem_ids for k, v in sorted(self.sources.items(), key=lambda x: x[1].stem_id): S.append(v.audio) S = np.array(S) self._rate = rate return S def __repr__(self): return "%s" % (self.name)
[docs] class Source(Track): """ An audio Target which is a linear mixture of several sources Attributes ---------- name : str Name of this source stem_id : int stem/substream ID is set here. is_wav : boolean If stem is read from wav or mp4 stem path : str Absolute path to audio file gain : float Mixing weight for this source """ def __init__( self, multitrack, # belongs to a multitrack name=None, # has its own name path=None, # might have its own path stem_id=None, # might have its own stem_id gain=1.0, *args, **kwargs ): self.multitrack = multitrack self.name = name self.path = path self.stem_id = stem_id self.gain = gain self._audio = None def __repr__(self): return self.path @property def audio(self): # return cached audio if explicitly set by setter if self._audio is not None: return self._audio # read from disk to save RAM otherwise else: return self.multitrack.load_audio( self.path, self.stem_id, self.multitrack.chunk_start, self.multitrack.chunk_duration, self.multitrack.sample_rate ) @audio.setter def audio(self, array): self._audio = array @property def rate(self): return self.multitrack.rate
# Target Track from musdb DB mixed from several musdb Tracks
[docs] class Target(Track): """ An audio Target which is a linear mixture of several sources Attributes ---------- multitrack : Track Track object sources : list[Source] list of ``Source`` objects for this ``Target`` """ def __init__( self, multitrack, sources, name=None, # has its own name ): self.multitrack = multitrack self.sources = sources self.name = name @property def audio(self): """array_like: [shape=(num_samples, num_channels)] mixes audio for targets on the fly """ mix_list = [] for source in self.sources: audio = source.audio if audio is not None: mix_list.append( source.gain * audio ) return np.sum(np.array(mix_list), axis=0) @property def rate(self): return self.multitrack.rate def __repr__(self): parts = [] for source in self.sources: parts.append(source.name) return '+'.join(parts)