google drive[link]에서 해당 음성들을 들을 수 있습니다
현재 keynote 형식으로만 지원되고 있습니다

1. PPT

해당 음성은 저작권과 관련하여 소리를 들을 수 없습니다 해당 음성은 저작권과 관련하여 소리를 들을 수 없습니다 해당 음성은 google drive에서 확인하세요
사용된 데이터셋: https://github.com/CheyneyComputerScience/CREMA-D
해당 음성은 google drive에서 확인하세요
사용된 데이터셋: https://github.com/CheyneyComputerScience/CREMA-D

2. Code

import os
import numpy as np

from scipy import signal
from scipy.signal import butter

import scipy.io
from scipy.io import wavfile

import IPython.display as ipd
import matplotlib.pyplot as plt

import librosa
def bandpass_filter(data, lowcut, highcut, fs, order=5):
    
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    
    y = signal.lfilter(b, a, data)
    
    return y

def lowpass_filter(data, highcut, fs, order=5):
    
    nyq = 0.5 * fs
    high = highcut / nyq
    b, a = butter(order, high, btype='low', analog=False)
    
    y = signal.lfilter(b, a, data)
    
    return y

def highpass_filter(data, lowcut, fs, order=5):
    
    nyq = 0.5 * fs
    low = lowcut / nyq
    b, a = butter(order, low, btype='high', analog=False)
    
    y = signal.lfilter(b, a, data)
    
    return y

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y
lowcut = 400
highcut= 2000
fs = 16000
for order in [3, 5, 7]:
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    w, h = signal.freqz(b, a, worN=2000)
    plt.plot((fs * 0.5 / np.pi) * w, abs(h), label="order = %d" % order)

plt.plot([0, 0.5 * fs], [np.sqrt(0.5), np.sqrt(0.5)],
         '--', label='sqrt(0.5)')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Gain')
plt.grid(True)
plt.legend(loc='best')

png

path = 'engInNY.wav'
sr, y = wavfile.read(path)

time = np.linspace(0, len(y)/sr, y.shape[0])
plt.plot(time, y[:, 0])
plt.plot(time, y[:, 1])
plt.show()

png

ipd.Audio(y[:, 0], rate=sr)
mix = y[:, 0]*0.5 + y[:, 1]*0.5
mix = mix[int(sr*8):]
order=5
time = np.linspace(0, len(mix)/sr, mix.shape[0])
ipd.Audio(mix, rate=sr)
N = len(mix)

k = np.arange(N)
T = N / sr
freq = k / T
freq = freq[range(int(N/2))]

# FFT 적용
yfft = np.fft.fft(mix)
yf = yfft / N
yf = yf[range(int(N/2))]

plt.rcParams["figure.figsize"] = (15,4)

# FFT 출력
plt.plot(freq, abs(yf), 'b')
plt.xlabel('Frequency')

plt.ylabel('Amplitude')
plt.xlim(0, sr/2)

plt.plot([70, 70], [0, 500])
plt.plot([150,150], [0, 500])
plt.plot([300,300], [0, 500])
plt.plot([400,400], [0, 500])
plt.plot([800,800], [0, 500])
plt.plot([1500,1500], [0, 500])
plt.plot([3000,3000], [0, 500])
plt.plot([6000,6000], [0, 500])
plt.plot([12000,12000], [0, 500])
plt.show()

plt.plot(freq, abs(yf), 'b')
plt.xlabel('Frequency')

plt.ylabel('Amplitude')
plt.xlim(0, sr/20)

plt.plot([70, 70], [0, 500])
plt.plot([150,150], [0, 500])
plt.plot([300,300], [0, 500])
plt.plot([400,400], [0, 500])
plt.plot([800,800], [0, 500])
plt.plot([1500,1500], [0, 500])
plt.plot([3000,3000], [0, 500])
plt.plot([6000,6000], [0, 500])
plt.plot([12000,12000], [0, 500])
plt.show()

png png

#  - 70Hz
bp = lowpass_filter(mix, 70, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 70 - 150Hz
bp = bandpass_filter(mix, 70, 150, sr, order=3)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 150 - 300Hz 
bp = bandpass_filter(mix, 150, 300, sr, order=3)
time = np.linspace(0, len(bp)/sr, bp.shape[0])
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 300 - 400Hz
bp = bandpass_filter(mix, 300, 400, sr, order=3)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 400 - 800Hz
bp = bandpass_filter(mix, 400, 800, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 800 - 1500Hz 
bp = bandpass_filter(mix, 800, 1500, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 1500 - 3000Hz 
bp = bandpass_filter(mix, 1500, 3000, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 3000 - 6000Hz
print(sr)
bp = bandpass_filter(mix, 3000, 6000, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 6000 - 12000Hz 
bp = bandpass_filter(mix, 6000, 12000, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

# 12000Hz -
bp = highpass_filter(mix, 12000, sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

filter = [0,70,150,300,400,800,1500,3000,6000,12000]
bp = np.zeros(mix.shape)
for i, f in enumerate(filter):
    if i >=1 and i<4:
        order = 3
    else:
        order =5
    if f == 0:
        bp += lowpass_filter(mix, filter[1], sr, order)
    elif f == 12000:
        bp += highpass_filter(mix, f, sr, order)
    else:
        bp += bandpass_filter(mix, f, filter[i+1], sr, order)
plt.plot(time, bp)
plt.show()
ipd.Audio(bp, rate=sr)

png

M = librosa.feature.melspectrogram(mix, sr, n_fft=int(sr*0.064), hop_length=int(sr*0.032), n_mels=32)

y_inverse = librosa.feature.inverse.mel_to_audio(M)

time = np.linspace(0, len(y_inverse)/sr, y_inverse.shape[0])
plt.plot(time, y_inverse[:,0])
plt.show()
ipd.Audio(y_inverse[:, 0], rate=sr)