-
Notifications
You must be signed in to change notification settings - Fork 61
/
Copy pathgenerate_noisy.py
75 lines (60 loc) · 2.42 KB
/
generate_noisy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# coding = utf-8
import numpy as np
from matplotlib import pyplot as plt
import librosa
import scipy
def SNR(x1, x2):
from numpy.linalg import norm
return 20 * np.log10(norm(x1) / norm(x2))
def signal_by_db(x1, x2, snr, handle_method):
x1 = x1.astype(np.int32)
x2 = x2.astype(np.int32)
l1 = x1.shape[0]
l2 = x2.shape[0]
if l1 != l2:
if handle_method == 'cut':
ll = min(l1, l2)
x1 = x1[:ll]
x2 = x2[:ll]
elif handle_method == 'append':
ll = max(l1, l2)
print(ll)
if l1 < ll:
x1 = np.append(x1, x1[:ll-l1])
if l2 < ll:
for i in range(int(l1/l2)+5):
x2 = np.append(x2, x2[:ll])
ll2 = min(x1.shape[0], x2.shape[0])
x1 = x1[:ll2]
x2 = x2[:ll2]
from numpy.linalg import norm
x2 = x2 / norm(x2) * norm(x1) / (10.0 ** (0.05 * snr))
mix = x1 + x2
return mix
if __name__ == '__main__':
num_FFT=512
hop_size=128
sr, speech_data = scipy.io.wavfile.read(u"/datasets/timit/test/sa1.wav")
sr, noise_data = scipy.io.wavfile.read('/datasets/noise/NOISEX/white.wav')
plt.figure(figsize=(10, 10))
S = librosa.stft(speech_data, n_fft=num_FFT, hop_length=hop_size, window='hanning')
S=np.log(np.abs(S)**2)
plt.subplot(311)
plt.imshow(librosa.power_to_db(librosa.feature.melspectrogram(y=speech_data, sr=sr, n_fft=num_FFT, hop_length=hop_size),ref=np.max), cmap="hot")
plt.title('Clean Spectrogram')
plt.colorbar(format='%+2.0f dB')
noisy_speech = signal_by_db(speech_data, noise_data, 15, 'cut')
S = librosa.stft(speech_data, n_fft=num_FFT, hop_length=hop_size, window='hanning')
S=np.log(np.abs(S)**2)
plt.subplot(312)
plt.imshow(librosa.power_to_db(librosa.feature.melspectrogram(y=noisy_speech, sr=sr, n_fft=num_FFT, hop_length=hop_size),ref=np.max), cmap="hot")
plt.title('Noisy Spectrogram')
plt.colorbar(format='%+2.0f dB')
noisy_speech = signal_by_db(speech_data, noise_data, 0, 'cut')
S = librosa.stft(speech_data, n_fft=num_FFT, hop_length=hop_size, window='hanning')
S=np.log(np.abs(S)**2)
plt.subplot(313)
plt.imshow(librosa.power_to_db(librosa.feature.melspectrogram(y=noisy_speech, sr=sr, n_fft=num_FFT, hop_length=hop_size),ref=np.max), cmap="hot")
plt.title('Noisy Spectrogram')
plt.colorbar(format='%+2.0f dB')
plt.show()