Skip to content

Commit

Permalink
Dataset preprocessing
Browse files Browse the repository at this point in the history
ㄴ debugging
  • Loading branch information
Yeongtae committed Jan 16, 2020
1 parent 9a8ad53 commit 0f6e1b4
Showing 1 changed file with 11 additions and 8 deletions.
19 changes: 11 additions & 8 deletions preprocess_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import argparse

sr = 22050
sr = 16000
max_wav_value=32768.0
trim_fft_size = 1024
trim_hop_size = 256
Expand All @@ -15,7 +15,7 @@
trim_top_db = 23
skip_len = 14848

def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
def preprocess_audio(file_list, silence_audio_size, pre_emphasis=False):
for F in file_list:
f = open(F, encoding='utf-8')
R = f.readlines()
Expand All @@ -27,10 +27,10 @@ def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
data, sampling_rate = librosa.core.load(wav_file, sr)
data = data / np.abs(data).max() *0.999
data_= librosa.effects.trim(data, top_db= trim_top_db, frame_length=trim_fft_size, hop_length=trim_hop_size)[0]
data_ = data_*max_wav_value
if (pre_emphasis):
data_ = np.append(data_[0], data_[1:] - 0.97 * data_[:-1])
data_ = data_ / np.abs(data_).max() * 0.999
data_ = data_ * max_wav_value
data_ = np.append(data_, [0.]*silence_audio_size)
data_ = data_.astype(dtype=np.int16)
write(wav_file, sr, data_)
Expand Down Expand Up @@ -58,23 +58,26 @@ def remove_short_audios(file_name):
if __name__ == "__main__":
"""
usage
python preprocess_audio.py -f=filelists/ljs_audio_text_test_filelist.txt,filelists/ljs_audio_text_train_filelist.txt,filelists/ljs_audio_text_val_filelist.txt -s=5 -p -r
python preprocess_dataset.py -f=metadata.csv -s=5 -t -p -r
python preprocess_dataset.py -f=metadata.csv
"""
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file_list', type=str,
help='file list to preprocess')
help='Metadata file list to preprocess')
parser.add_argument('-s', '--silence_padding', type=int, default=0,
help='Adding silence padding at the end of each audio, silence audio size is hop_length * silence padding')
parser.add_argument('-p', '--pre_emphasis', action='store_true',
help="do or don't do pre_emphasis")
help="Doing pre_emphasis")
parser.add_argument('-t', '--trimming', action='store_true',
help="Doing trimming audios")
parser.add_argument('-r', '--remove_short_audios',action='store_true',
help="do or don't remove short audios")
help="Removing short audios in metadata file")
args = parser.parse_args()
file_list = args.file_list.split(',')
silence_audio_size = trim_hop_size * args.silence_padding
remove_short_audios = args.remove_short_audios

preprocess_audio(file_list, silence_audio_size)
preprocess_audio(file_list, silence_audio_size, args.pre_emphasis)

if(remove_short_audios):
for f in file_list:
Expand Down

0 comments on commit 0f6e1b4

Please sign in to comment.