From 0f6e1b4c3053288880c20f59af17a66becd71727 Mon Sep 17 00:00:00 2001 From: YeongTae Date: Thu, 16 Jan 2020 12:27:52 +0900 Subject: [PATCH] =?UTF-8?q?Dataset=20preprocessing=20=E3=84=B4=20debugging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- preprocess_dataset.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/preprocess_dataset.py b/preprocess_dataset.py index 5a4a0db6f..cc9f82bfe 100644 --- a/preprocess_dataset.py +++ b/preprocess_dataset.py @@ -6,7 +6,7 @@ import numpy as np import argparse -sr = 22050 +sr = 16000 max_wav_value=32768.0 trim_fft_size = 1024 trim_hop_size = 256 @@ -15,7 +15,7 @@ trim_top_db = 23 skip_len = 14848 -def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True): +def preprocess_audio(file_list, silence_audio_size, pre_emphasis=False): for F in file_list: f = open(F, encoding='utf-8') R = f.readlines() @@ -27,10 +27,10 @@ def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True): data, sampling_rate = librosa.core.load(wav_file, sr) data = data / np.abs(data).max() *0.999 data_= librosa.effects.trim(data, top_db= trim_top_db, frame_length=trim_fft_size, hop_length=trim_hop_size)[0] - data_ = data_*max_wav_value if (pre_emphasis): data_ = np.append(data_[0], data_[1:] - 0.97 * data_[:-1]) data_ = data_ / np.abs(data_).max() * 0.999 + data_ = data_ * max_wav_value data_ = np.append(data_, [0.]*silence_audio_size) data_ = data_.astype(dtype=np.int16) write(wav_file, sr, data_) @@ -58,23 +58,26 @@ def remove_short_audios(file_name): if __name__ == "__main__": """ usage - python preprocess_audio.py -f=filelists/ljs_audio_text_test_filelist.txt,filelists/ljs_audio_text_train_filelist.txt,filelists/ljs_audio_text_val_filelist.txt -s=5 -p -r + python preprocess_dataset.py -f=metadata.csv -s=5 -t -p -r + python preprocess_dataset.py -f=metadata.csv """ parser = argparse.ArgumentParser() parser.add_argument('-f', '--file_list', type=str, - help='file list to preprocess') + help='Metadata file list to preprocess') parser.add_argument('-s', '--silence_padding', type=int, default=0, help='Adding silence padding at the end of each audio, silence audio size is hop_length * silence padding') parser.add_argument('-p', '--pre_emphasis', action='store_true', - help="do or don't do pre_emphasis") + help="Doing pre_emphasis") + parser.add_argument('-t', '--trimming', action='store_true', + help="Doing trimming audios") parser.add_argument('-r', '--remove_short_audios',action='store_true', - help="do or don't remove short audios") + help="Removing short audios in metadata file") args = parser.parse_args() file_list = args.file_list.split(',') silence_audio_size = trim_hop_size * args.silence_padding remove_short_audios = args.remove_short_audios - preprocess_audio(file_list, silence_audio_size) + preprocess_audio(file_list, silence_audio_size, args.pre_emphasis) if(remove_short_audios): for f in file_list: