Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trainning in French #48

Open
ZheQU-somfy opened this issue Apr 15, 2020 · 0 comments
Open

Trainning in French #48

ZheQU-somfy opened this issue Apr 15, 2020 · 0 comments

Comments

@ZheQU-somfy
Copy link

ZheQU-somfy commented Apr 15, 2020

Hi,
I want to train the model in French, i use the data set from website 'common_voice'.
I wrote commoncoive_fr.py like this:
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import numpy as np
import os
from util import audio

def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
index = 1
with open(os.path.join(in_dir, 'train.tsv'), encoding='utf-8') as f:
for line in f:
parts = line.strip().split('\t')
wav_path = os.path.join(in_dir,parts[1])
text = parts[2]
futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
index += 1
return [future.result() for future in tqdm(futures)]

def _process_utterance(out_dir, index, wav_path, text):
'''Preprocesses a single utterance audio/text pair.

This writes the mel and linear scale spectrograms to disk and returns a tuple to write
to the train.txt file.

Args:
out_dir: The directory to write the spectrograms into
index: The numeric index to use in the spectrogram filenames.
wav_path: Path to the audio file containing the speech input
text: The text spoken in the input audio file

Returns:
A (spectrogram_filename, mel_filename, n_frames, text) tuple to write to train.txt
'''

Load the audio to a numpy array:

wav = audio.load_wav(wav_path)

Compute the linear-scale spectrogram from the wav:

spectrogram = audio.spectrogram(wav).astype(np.float32)
n_frames = spectrogram.shape[1]

Compute a mel-scale spectrogram from the wav:

mel_spectrogram = audio.melspectrogram(wav).astype(np.float32)

Write the spectrograms to disk:

spectrogram_filename = 'commonvoice_fr-spec.npy' % index
mel_filename = 'commonvoice_fr-mel.npy' % index
np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False)
np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False)

Return a tuple describing this training example:

return (spectrogram_filename, mel_filename, n_frames, text)

And I modified the preprocess.py like this:
import argparse
import os
from multiprocessing import cpu_count
from tqdm import tqdm
from datasets import amy, blizzard, ljspeech, kusal, mailabs,commonvoice_fr
from datasets import mrs
from hparams import hparams, hparams_debug_string
import sys

def preprocess_blizzard(args):
in_dir = os.path.join(args.base_dir, 'Blizzard2012')
out_dir = os.path.join(args.base_dir, args.output)
os.makedirs(out_dir, exist_ok=True)
metadata = blizzard.build_from_path(
in_dir, out_dir, args.num_workers, tqdm=tqdm)
write_metadata(metadata, out_dir)

def preprocess_ljspeech(args):
in_dir = os.path.join(args.base_dir, 'LJSpeech-1.1')
out_dir = os.path.join(args.base_dir, args.output)
os.makedirs(out_dir, exist_ok=True)
metadata = ljspeech.build_from_path(
in_dir, out_dir, args.num_workers, tqdm=tqdm)
write_metadata(metadata, out_dir)

def preprocess_mrs(args):
in_dir = args.mrs_dir
out_dir = os.path.join(args.base_dir, args.output)
username = args.mrs_username
os.makedirs(out_dir, exist_ok=True)
metadata = mrs.build_from_path(
in_dir, out_dir, username, args.num_workers, tqdm=tqdm)
write_metadata(metadata, out_dir)

def preprocess_amy(args):
in_dir = os.path.join(args.base_dir, 'amy')
out_dir = os.path.join(args.base_dir, args.output)
os.makedirs(out_dir, exist_ok=True)
metadata = amy.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm)
write_metadata(metadata, out_dir)

def preprocess_kusal(args):
in_dir = os.path.join(args.base_dir, 'kusal')
out_dir = os.path.join(args.base_dir, args.output)
os.makedirs(out_dir, exist_ok=True)
metadata = kusal.build_from_path(
in_dir, out_dir, args.num_workers, tqdm=tqdm)
write_metadata(metadata, out_dir)

def preprocess_mailabs(args):
in_dir = os.path.join(args.mailabs_books_dir)
out_dir = os.path.join(args.base_dir, args.output)
os.makedirs(out_dir, exist_ok=True)
books = args.books
metadata = mailabs.build_from_path(
in_dir, out_dir, books, args.num_workers, tqdm)
write_metadata(metadata, out_dir)

def preprocess_commonvoice(args):
in_dir = os.path.join(args.base_dir,'clips')
out_dir = os.path.join(args.base_dir,args.output)
os.makedirs(out_dir,exist_ok=True)
metdata = commonvoice_fr.build_from_path(in_dir,out_dir,
args.num_workers,tqdm=tqdm)
write_metadata(metadata,out_dir)

def write_metadata(metadata, out_dir):
with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f:
for m in metadata:
f.write('|'.join([str(x) for x in m]) + '\n')
frames = sum([m[2] for m in metadata])
hours = frames * hparams.frame_shift_ms / (3600 * 1000)
print('Wrote %d utterances, %d frames (%.2f hours)' %
(len(metadata), frames, hours))
print('Max input length: %d' % max(len(m[3]) for m in metadata))
print('Max output length: %d' % max(m[2] for m in metadata))
with open("metadata.txt", 'w') as f:
f.write(
'''
Wrote {} utterances, {} frames, {} hours\n
Max input lengh: {} \n
Max output length: {} \n
'''.format(
len(metadata), frames, hours,
max(len(m[3]) for m in metadata), max(m[2] for m in metadata)
)
)

def main():
parser = argparse.ArgumentParser()
parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron'))
parser.add_argument('--mrs_dir', required=False)
parser.add_argument('--mrs_username', required=False)
parser.add_argument('--output', default='training')
parser.add_argument(
'--dataset', required=True, choices=['amy', 'blizzard', 'ljspeech',
'kusal', 'mailabs','mrs','commonvoice']
)
parser.add_argument('--mailabs_books_dir',
help='absolute directory to the books for the mlailabs')
parser.add_argument(
'--books',
help='comma-seperated and no space name of books i.e hunter_space,pink_fairy_book,etc.',
)
parser.add_argument('--num_workers', type=int, default=cpu_count())
args = parser.parse_args()

if args.dataset == 'mailabs' and args.books is None:
parser.error("--books required if mailabs is chosen for dataset.")

if args.dataset == 'mailabs' and args.mailabs_books_dir is None:
parser.error(
"--mailabs_books_dir required if mailabs is chosen for dataset.")

print(hparams_debug_string())

if args.dataset == 'amy':
preprocess_amy(args)
elif args.dataset == 'blizzard':
preprocess_blizzard(args)
elif args.dataset == 'ljspeech':
preprocess_ljspeech(args)
elif args.dataset == 'kusal':
preprocess_kusal(args)
elif args.dataset == 'mailabs':
preprocess_mailabs(args)
elif args.dataset == 'mrs':
preprocess_mrs(args)
elif args.dataset == 'commonvoice':
preprocess_commonvoice(args)

if name == "main":
main()

But when I preprocces the data by using the commande:
python3 preprocess.py --dataset commonvoice
I got this erros:
Traceback (most recent call last):
File "/usr/lib/python3.5/concurrent/futures/process.py", line 175, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File "/root/mimic2/datasets/commonvoice_fr.py", line 64, in _process_utterance
spectrogram_filename = 'commonvoice_fr-spec.npy' % index
TypeError: not all arguments converted during string formatting
"""

Could you please help me to solve this problem?
Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant