This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).
1. It loads an example audio file containing drums and melodic instruments
2. It computes the STFT of the audio data.
3. It applies KAM and NMF as described in [2], with random initialization of the NMF components. The final classification into harmonic and percussive is done according to the percussiveness threshold p_thresh = 0.25 as given in [2].
4. It visualizes the decomposition results.
5. It resynthesizes the separated audio streams and saves them as wav files to the hard drive.
[1] Christian Dittmar, Meinard Müller
Reverse Engineering the Amen Break - Score-informed Separation and
Restoration applied to Drum Recordings
IEEE/ACM Transactions on Audio, Speech, and Language Processing,
24(9): 1531-1543, 2016.
[2] Christian Dittmar, Patricio López-Serrano, Meinard Müller
Unifying Local and Global Methods for Harmonic-Percussive Source Separation
In Proceedings of the IEEE International Conference on Acoustics,
Speech, and Signal Processing (ICASSP), 2018.
[3] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller
NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization
In Proceedings of the International Conference on Digital Audio Effects (DAFx), 2019.
This file is part of 'NMF toolbox'. 'NMF toolbox' is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 'NMF toolbox' is distributed in the hope that it will be useful, but ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
import os
import numpy as np
import scipy.io.wavfile as wav
import IPython.display as ipd
from NMFtoolbox.utils import make_monaural, pcmInt16ToFloat32Numpy
from NMFtoolbox.forwardSTFT import forwardSTFT
from NMFtoolbox.inverseSTFT import inverseSTFT
from NMFtoolbox.logFreqLogMag import logFreqLogMag
from NMFtoolbox.HPSS_KAM import HPSS_KAM_Fitzgerald
from NMFtoolbox.initTemplates import initTemplates
from NMFtoolbox.initActivations import initActivations
from NMFtoolbox.drumSpecificSoftConstraintsNMF import drumSpecificSoftConstraintsNMF
from NMFtoolbox.NMFD import NMFD
from NMFtoolbox.percussivenessEstimation import percussivenessEstimation
from NMFtoolbox.convModel import convModel
from NMFtoolbox.alphaWienerFilter import alphaWienerFilter
from NMFtoolbox.visualizeComponentsNMF import visualizeComponentsNMF
from NMFtoolbox.visualizeComponentsKAM import visualizeComponentsKAM
inpPath = '../data/'
outPath = 'output/'
# create the output directory if it doesn't exist
if not os.path.isdir(outPath):
os.makedirs(outPath)
filename = 'runningExample_IGotYouMixture.wav'
# read signal
fs, x = wav.read(os.path.join(inpPath, filename))
# make monaural if necessary
x = make_monaural(x)
# convert wav from int16 to float32
x = pcmInt16ToFloat32Numpy(x)
# spectral parameters
paramSTFT = dict()
paramSTFT['blockSize'] = 2048
paramSTFT['hopSize'] = 512
paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
paramSTFT['reconstMirror'] = True
paramSTFT['appendFrame'] = True
paramSTFT['numSamples'] = len(x)
# STFT computation
X, A, P = forwardSTFT(x, paramSTFT)
# get dimensions and time and freq resolutions
numBins, numFrames = X.shape
deltaT = paramSTFT['hopSize'] / fs
deltaF = fs / paramSTFT['blockSize']
# get logarithmically-spaced frequency axis version for visualization purposes
logFreqLogMagA, logFreqAxis = logFreqLogMag(A, deltaF)
numLogBins = len(logFreqAxis)
# set common parameters
numIterKAM = 30
kamA, Kern, KernOrd = HPSS_KAM_Fitzgerald(A, numIterKAM, 13)
# visualize
paramVis = dict()
paramVis['deltaT'] = deltaT
paramVis['deltaF'] = deltaF
paramVis['fontSize'] = 14
fh1 = visualizeComponentsKAM(kamA, paramVis)
# save result
fh1.savefig(os.path.join(outPath, 'demoDrumExtractionKAM_NMF_percThreshold_KAM.png'))
audios = []
# resynthesize KAM results
for k in range(2):
Y = kamA[k] * np.exp(1j * P);
y, _ = inverseSTFT(Y, paramSTFT)
audios.append(y)
# save result
out_filepath = os.path.join(outPath,
'demoDrumExtractionKAM_NMF_percThreshold_KAM_component_{}_extracted_from_{}'.format(k, filename))
wav.write(filename=out_filepath, rate=fs, data=y)
ipd.Audio(x, rate=fs)