From 4f15f77ef1713a1f0d6b185bb6109a953c9a7cd4 Mon Sep 17 00:00:00 2001
From: Sami <58188482+RAYTRAC3R@users.noreply.github.com>
Date: Fri, 12 Mar 2021 15:02:46 -0600
Subject: [PATCH 1/5] Add voice hybridization

I'm not too good at coding, but I managed to code an multispeaker option that SHOULD mix together voices when you select multiple speakers while it's activated.
---
 CookieTTS/_5_infer/VDVAETTS_server/text2speech.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/CookieTTS/_5_infer/VDVAETTS_server/text2speech.py b/CookieTTS/_5_infer/VDVAETTS_server/text2speech.py
index b8ed22f..e28cfd7 100644
--- a/CookieTTS/_5_infer/VDVAETTS_server/text2speech.py
+++ b/CookieTTS/_5_infer/VDVAETTS_server/text2speech.py
@@ -436,20 +436,25 @@ def shuffle_and_return():
                     speaker_names.append(speaker_names.pop(0))
                     return first_speaker
                 batch_speaker_names = [shuffle_and_return() for i in range(simultaneous_texts)]
+            elif multispeaker_mode == "hybrid_voices":
+                batch_speaker_names = speaker_names * -(-simultaneous_texts//len(speaker_names))
             else:
                 raise NotImplementedError
             
             if 0:# (optional) use different speaker list for text inside quotes
                 speaker_ids = [random.choice(speakers).split("|")[2] if ('"' in text) else random.choice(narrators).split("|")[2] for text in text_batch] # pick speaker if quotemark in text, else narrator
-            text_batch  = [text.replace('"',"") for text in text_batch] # remove quotes from text
+                text_batch  = [text.replace('"',"") for text in text_batch] # remove quotes from text
             
             if len(batch_speaker_names) > len(text_batch):
                 batch_speaker_names = batch_speaker_names[:len(text_batch)]
-                simultaneous_texts = len(text_batch)
+            simultaneous_texts = len(text_batch)
             
             # get speaker_ids (VDVAETTS)
             VDVAETTS_speaker_ids = [self.ttm_sp_name_lookup[speaker] for speaker in batch_speaker_names]
             VDVAETTS_speaker_ids = torch.LongTensor(VDVAETTS_speaker_ids).cuda().repeat_interleave(batch_size_per_text)
+            #VDVAETTS_speaker_mix = [44]
+            #print(VDVAETTS_speaker_mix)
+            #VDVAETTS_speaker_mix = torch.LongTensor(VDVAETTS_speaker_mix).cuda().repeat_interleave(batch_size_per_text)
             
             # get style input
             try:
@@ -503,7 +508,7 @@ def shuffle_and_return():
                 while np.amin(best_score) < target_score:
                     # run VDVAETTS
                     if status_updates: print("..", end='')
-                    outputs = self.VDVAETTS.inference(sequence, text_lengths.repeat_interleave(batch_size_per_text, dim=0), VDVAETTS_speaker_ids, style_input, char_sigma=char_sigma, frame_sigma=frame_sigma)
+                    outputs = self.VDVAETTS.inference(sequence, text_lengths.repeat_interleave(batch_size_per_text, dim=0), VDVAETTS_speaker_ids, style_input, multispeaker_mode, char_sigma=char_sigma, frame_sigma=frame_sigma)
                     batch_pred_mel = outputs['hifigan_inputs'] if self.MTW_conf.uses_latent_input else outputs['pred_mel']
                     
                     # metric for html side

From 101bdb13fcb6f3f15a181ecfe9dfbc57d41afd9d Mon Sep 17 00:00:00 2001
From: Sami <58188482+RAYTRAC3R@users.noreply.github.com>
Date: Fri, 12 Mar 2021 15:04:03 -0600
Subject: [PATCH 2/5] Update main.html

---
 CookieTTS/_5_infer/VDVAETTS_server/templates/main.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CookieTTS/_5_infer/VDVAETTS_server/templates/main.html b/CookieTTS/_5_infer/VDVAETTS_server/templates/main.html
index a767b71..f7f6c32 100644
--- a/CookieTTS/_5_infer/VDVAETTS_server/templates/main.html
+++ b/CookieTTS/_5_infer/VDVAETTS_server/templates/main.html
@@ -127,6 +127,7 @@ <h1 class="display-4">Text To Speech</h1>
                                 <option value="cycle_next" {{ 'selected' if multispeaker_mode == 'cycle_next' else '' }}>Cycle Next (go down the list, use the next speaker for each text then loop back to the top)</option>
                                 <option value="interleaved" {{ 'selected' if multispeaker_mode == 'interleaved' else '' }}>Interleaved (seperate speaker for each text segment, going from top to bottom)</option>
                                 <option value="not_interleaved" {{ 'selected' if multispeaker_mode == 'not_interleaved' else '' }}>Non-Interleaved (speaker going from top to bottom per clip attempt)</option>
+                                <option value="hybrid_voices" {{ 'selected' if multispeaker_mode == 'hybrid_voices' else '' }}>Hybrid Voices (Combine voices into one)</option>
                             </select></div>
                         <div class="form-group">
                             <label for="select">Global Sigma Scaler</label>

From e4cb487a5476b09f5c7abfa6fa199a0a9f2c7236 Mon Sep 17 00:00:00 2001
From: Sami <58188482+RAYTRAC3R@users.noreply.github.com>
Date: Fri, 12 Mar 2021 15:04:55 -0600
Subject: [PATCH 3/5] Update model.py

---
 CookieTTS/_2_ttm/VDVAETTS/model.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/CookieTTS/_2_ttm/VDVAETTS/model.py b/CookieTTS/_2_ttm/VDVAETTS/model.py
index dd60d35..f0681de 100644
--- a/CookieTTS/_2_ttm/VDVAETTS/model.py
+++ b/CookieTTS/_2_ttm/VDVAETTS/model.py
@@ -1443,10 +1443,10 @@ def update_device(self, **inputs):
                 outputs[key] = input
         return outputs
     
-    def inference(self, text_seq, text_lengths, speaker_id, torchmoji_hdn,
+    def inference(self, text_seq, text_lengths, speaker_id, torchmoji_hdn, multispeaker_mode,
                     char_sigma=1.0, frame_sigma=1.0,
                     bn_logdur=None, char_dur=None, gt_mel=None, alignment=None,
-                    mel_lengths=None,):# [B, enc_T], [B], [B], [B], [B, tm_dim]
+                    mel_lengths=None):# [B, enc_T], [B], [B], [B], [B, tm_dim]
         outputs = {}
         
         memory = []
@@ -1458,7 +1458,18 @@ def inference(self, text_seq, text_lengths, speaker_id, torchmoji_hdn,
         # (Speaker) speaker_id -> speaker_embed
         if hasattr(self, "speaker_embedding"):
             speaker_embed = self.speaker_embedding(speaker_id)# [B, embed]
-            outputs["speaker_embed"] = speaker_embed# [B, embed]
+            if multispeaker_mode == "hybrid_voices" and speaker_embed.shape[0] > 1:
+              splits = int(speaker_embed.shape[0] / 2)
+              mix_1, mix_2 = torch.split(speaker_embed, splits)
+              speaker_embed = torch.add(mix_1, mix_2)
+              speaker_embed = torch.div(speaker_embed, 2)
+              speaker_embed = speaker_embed.repeat(2, 1)
+            #outputs["speaker_embed"] = speaker_embed# [B, embed]
+            #speaker_embed_mix = self.speaker_embedding(speaker_mix)# [B, embed]
+            #outputs["speaker_embed_mix"] = speaker_embed_mix# [B, embed]
+            #print(speaker_embed_mix)
+            #speaker_embed = torch.div(torch.add(speaker_embed, speaker_embed_mix), 2)
+            outputs["speaker_embed"] = speaker_embed
         
         # (TorchMoji)
         if hasattr(self, 'tm_bn'):

From 509856b066f34bc8f43d4eb3e0309ada4580ba13 Mon Sep 17 00:00:00 2001
From: Sami <58188482+RAYTRAC3R@users.noreply.github.com>
Date: Sat, 13 Mar 2021 12:39:00 -0600
Subject: [PATCH 4/5] Update train.py

---
 CookieTTS/_2_ttm/VDVAETTS/train.py | 310 +++++++++++++++--------------
 1 file changed, 156 insertions(+), 154 deletions(-)

diff --git a/CookieTTS/_2_ttm/VDVAETTS/train.py b/CookieTTS/_2_ttm/VDVAETTS/train.py
index 42aece9..ae8abda 100644
--- a/CookieTTS/_2_ttm/VDVAETTS/train.py
+++ b/CookieTTS/_2_ttm/VDVAETTS/train.py
@@ -758,163 +758,165 @@ def train(args, rank, group_name, hparams):
                     y = model.parse_batch(batch)# move batch to GPU (async)
                     y['gt_mel'].requires_grad_()
                     y['use_pred_z'] = False
-                    y_pred = force(model, valid_kwargs=model_args, **y)
-                    
-                    loss_scalars = {
-                      "decoder_MAE_weight": decoder_MAE_weight,
-                      "decoder_MSE_weight": decoder_MSE_weight,
-                      "decoder_KLD_weight": decoder_KLD_weight,
-                      "varpred_MAE_weight": varpred_MAE_weight,
-                      "varpred_MSE_weight": varpred_MSE_weight,
-                      "varpred_KLD_weight": varpred_KLD_weight,
-                   "postnet_f0_MAE_weight": postnet_f0_MAE_weight,
-                   "postnet_f0_MSE_weight": postnet_f0_MSE_weight,
-               "postnet_voiced_MAE_weight": postnet_voiced_MAE_weight,
-               "postnet_voiced_BCE_weight": postnet_voiced_BCE_weight,
-                      "postnet_KLD_weight": postnet_KLD_weight,
-                      "postnet_MAE_weight": postnet_MAE_weight,
-                      "postnet_MSE_weight": postnet_MSE_weight,
-                         "mdn_loss_weight": mdn_loss_weight,
-                         "dur_loss_weight": dur_loss_weight,
-                        "sylps_MAE_weight": sylps_MAE_weight,
-                        "sylps_MSE_weight": sylps_MSE_weight,
-                         "diag_att_weight": diag_att_weight,
-              "HiFiGAN_g_all_class_weight": HiFiGAN_g_all_class_weight,
-         "HiFiGAN_g_all_featuremap_weight": HiFiGAN_g_all_featuremap_weight,
-            "HiFiGAN_g_all_mel_mae_weight": HiFiGAN_g_all_mel_mae_weight,
-              "HiFiGAN_d_all_class_weight": HiFiGAN_d_all_class_weight,
-                    }
-                    loss_dict, file_losses_batch = criterion(iteration, model, y_pred, y, loss_scalars,
-                                                hifiGAN if hparams.HiFiGAN_enable else None,)
-                    
-                    file_losses = update_smoothed_dict(file_losses, file_losses_batch, file_losses_smoothness)
-                    
-                    if hparams.distributed_run:
-                        reduced_loss_dict = {k: reduce_tensor(v.data, args.n_gpus).item() if v is not None else 0. for k, v in loss_dict.items()}
-                    else:
-                        reduced_loss_dict = {k: v.item() if v is not None else 0. for k, v in loss_dict.items()}
-                    
-                    reduced_loss = reduced_loss_dict['loss']
-                    
-                    loss = loss_dict['loss']
-                    if hparams.fp16_run:
-                        with amp.scale_loss(loss, optimizer) as scaled_loss:
-                            scaled_loss.backward()
-                    else:
-                        loss.backward()
-                    
-                    if rank==0 and show_gradients:# debug/extreme verbose
-                        try:
-                            _=avg_grads
-                        except:
-                            avg_grads = {}
-                        for param_name, params in model.named_parameters():
-                            if params.requires_grad and params.grad is not None:
-                                norm_grad = 1.0
-                                grad = params.grad.abs().sum().item()
-                                if param_name not in avg_grads:
-                                    avg_grads[param_name] = grad
-                                elif grad*5. < avg_grads[param_name]:
-                                    avg_grads[param_name] = (avg_grads[param_name]*0.9)+(grad*0.1)
-                                norm_grad = grad/avg_grads[param_name]
-                                if grad > 30.0 or norm_grad > 2.0:
-                                    print(f'{norm_grad:03.1f} | {grad:020.6f} | {params.grad.abs().mean().item():06.6f}| {params.grad.abs().max().item():010.6f} | {param_name}')
-                    
-                    if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:# HiFiGAN isn't supposed to use gradient clipping so the optimizer.
-                        hifiGAN.g_optimizer_step_and_clear()#      should be ran before gradient clipping occurs.
-                    
-                    if grad_clip_thresh:# apply gradient clipping to params
+                    print(y['gt_mel'].shape[2], y['gt_frame_logf0'].shape[1])
+                    if y['gt_mel'].shape[2] == y['gt_frame_logf0'].shape[1]:
+                        y_pred = force(model, valid_kwargs=model_args, **y)
+                        
+                        loss_scalars = {
+                          "decoder_MAE_weight": decoder_MAE_weight,
+                          "decoder_MSE_weight": decoder_MSE_weight,
+                          "decoder_KLD_weight": decoder_KLD_weight,
+                          "varpred_MAE_weight": varpred_MAE_weight,
+                          "varpred_MSE_weight": varpred_MSE_weight,
+                          "varpred_KLD_weight": varpred_KLD_weight,
+                      "postnet_f0_MAE_weight": postnet_f0_MAE_weight,
+                      "postnet_f0_MSE_weight": postnet_f0_MSE_weight,
+                  "postnet_voiced_MAE_weight": postnet_voiced_MAE_weight,
+                  "postnet_voiced_BCE_weight": postnet_voiced_BCE_weight,
+                          "postnet_KLD_weight": postnet_KLD_weight,
+                          "postnet_MAE_weight": postnet_MAE_weight,
+                          "postnet_MSE_weight": postnet_MSE_weight,
+                            "mdn_loss_weight": mdn_loss_weight,
+                            "dur_loss_weight": dur_loss_weight,
+                            "sylps_MAE_weight": sylps_MAE_weight,
+                            "sylps_MSE_weight": sylps_MSE_weight,
+                            "diag_att_weight": diag_att_weight,
+                  "HiFiGAN_g_all_class_weight": HiFiGAN_g_all_class_weight,
+            "HiFiGAN_g_all_featuremap_weight": HiFiGAN_g_all_featuremap_weight,
+                "HiFiGAN_g_all_mel_mae_weight": HiFiGAN_g_all_mel_mae_weight,
+                  "HiFiGAN_d_all_class_weight": HiFiGAN_d_all_class_weight,
+                        }
+                        loss_dict, file_losses_batch = criterion(iteration, model, y_pred, y, loss_scalars,
+                                                    hifiGAN if hparams.HiFiGAN_enable else None,)
+                        
+                        file_losses = update_smoothed_dict(file_losses, file_losses_batch, file_losses_smoothness)
+                        
+                        if hparams.distributed_run:
+                            reduced_loss_dict = {k: reduce_tensor(v.data, args.n_gpus).item() if v is not None else 0. for k, v in loss_dict.items()}
+                        else:
+                            reduced_loss_dict = {k: v.item() if v is not None else 0. for k, v in loss_dict.items()}
+                        
+                        reduced_loss = reduced_loss_dict['loss']
+                        
+                        loss = loss_dict['loss']
                         if hparams.fp16_run:
-                            grad_norm = torch.nn.utils.clip_grad_norm_(
-                                amp.master_params(optimizer), grad_clip_thresh)
-                            is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
+                            with amp.scale_loss(loss, optimizer) as scaled_loss:
+                                scaled_loss.backward()
                         else:
-                            grad_norm = torch.nn.utils.clip_grad_norm_(
-                                model.parameters(), grad_clip_thresh)
-                    else:
-                        grad_norm = 0.0
-                    
-                    if math.isfinite(grad_norm):
-                        optimizer.step()
-                    
-                    # calcuate the effective learning rate after gradient clipping is applied, and use the effective learning rate on the GAN modules.
-                    effective_lr = 0.0 if is_overflow else (learning_rate*min((grad_clip_thresh/grad_norm+1e-6), 1.0) if grad_clip_thresh else learning_rate)
-                    
-                    # (Optional) Discriminator Forward+Backward Pass
-                    if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:
-                        hifiGAN.train(model.training)
-                        with torch.random.fork_rng(devices=[0,]):
-                            hifiGAN(y_pred, y, reduced_loss_dict, loss_dict, loss_scalars)
-                    
-                    # get current Loss Scale of first optimizer
-                    loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if hparams.fp16_run else 32768.
-                    
-                    # restart if training/model has collapsed
-                    if (iteration > 1e3 and (reduced_loss > LossExplosionThreshold)) or (math.isnan(reduced_loss)):
-                        raise LossExplosion(f"\nLOSS EXPLOSION EXCEPTION ON RANK {rank}: Loss reached {reduced_loss} during iteration {iteration}.\n\n\n")
-                    if (loss_scale < 1/4):
-                        raise LossExplosion(f"\nLOSS EXCEPTION ON RANK {rank}: Loss Scaler reached {loss_scale} during iteration {iteration}.\n\n\n")
-                    
-                    if expavg_loss_dict is None:
-                        expavg_loss_dict = reduced_loss_dict
-                    else:
-                        expavg_loss_dict.update({k:v for k, v in reduced_loss_dict.items() if k not in expavg_loss_dict.keys()})# if new loss term appears in reduced_loss_dict, add it to the expavg_loss_dict.
-                        expavg_loss_dict = {k: (reduced_loss_dict[k]*(1-loss_dict_smoothness))+(expavg_loss_dict[k]*loss_dict_smoothness) for k in expavg_loss_dict.keys() if k in reduced_loss_dict}
-                        expavg_loss_dict_iters += 1
-                    
-                    if expavg_loss_dict_iters > 100:# calc smoothed loss dict
-                        if best_loss_dict is None:
-                            best_loss_dict = expavg_loss_dict
+                            loss.backward()
+                        
+                        if rank==0 and show_gradients:# debug/extreme verbose
+                            try:
+                                _=avg_grads
+                            except:
+                                avg_grads = {}
+                            for param_name, params in model.named_parameters():
+                                if params.requires_grad and params.grad is not None:
+                                    norm_grad = 1.0
+                                    grad = params.grad.abs().sum().item()
+                                    if param_name not in avg_grads:
+                                        avg_grads[param_name] = grad
+                                    elif grad*5. < avg_grads[param_name]:
+                                        avg_grads[param_name] = (avg_grads[param_name]*0.9)+(grad*0.1)
+                                    norm_grad = grad/avg_grads[param_name]
+                                    if grad > 30.0 or norm_grad > 2.0:
+                                        print(f'{norm_grad:03.1f} | {grad:020.6f} | {params.grad.abs().mean().item():06.6f}| {params.grad.abs().max().item():010.6f} | {param_name}')
+                        
+                        if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:# HiFiGAN isn't supposed to use gradient clipping so the optimizer.
+                            hifiGAN.g_optimizer_step_and_clear()#      should be ran before gradient clipping occurs.
+                        
+                        if grad_clip_thresh:# apply gradient clipping to params
+                            if hparams.fp16_run:
+                                grad_norm = torch.nn.utils.clip_grad_norm_(
+                                    amp.master_params(optimizer), grad_clip_thresh)
+                                is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
+                            else:
+                                grad_norm = torch.nn.utils.clip_grad_norm_(
+                                    model.parameters(), grad_clip_thresh)
                         else:
-                            best_loss_dict = {k: min(best_loss_dict[k], expavg_loss_dict[k]) for k in best_loss_dict.keys() if k in expavg_loss_dict}
-                    
-                    if rank == 0:# print + log metrics
-                        duration = time.time() - start_time
-                        if not is_overflow:
-                            average_loss = rolling_loss.process(reduced_loss)
-                            dbGANAccStr  = expavg_loss_dict.get('dbGAN_accuracy',  None) or reduced_loss_dict.get('dbGAN_accuracy',  0.5)
-                            InfGANAccStr = expavg_loss_dict.get('InfGAN_accuracy', None) or reduced_loss_dict.get('InfGAN_accuracy', 0.5)
-                            WScoreStr    = expavg_loss_dict.get('weighted_score' , None) or reduced_loss_dict.get('weighted_score' , 0.0)
-                            logger.log_training(model, reduced_loss_dict, expavg_loss_dict, best_loss_dict, grad_norm, learning_rate, duration, iteration)
-                        tqdm.write(
-                            f"{iteration} [TrainLoss:{reduced_loss:.3f} Avg:{average_loss:.3f}] "
-                            f"[{grad_norm:03.1f}GradNorm] [{duration:.2f}s/it] "
-                            f"[{(duration/(hparams.batch_size*args.n_gpus)):.3f}s/file] "
-                            f"[{learning_rate:.1e}LR] [{loss_scale:.0f}LS] "
-                            f"[{WScoreStr:.1%}AttSc] [{dbGANAccStr:.1%}dbGANAcc] [{InfGANAccStr:.1%}InfGANAcc]")
-                        if is_overflow:
-                            tqdm.write("Gradient Overflow, Skipping Step\n")
-                        start_time = time.time()
-                    
-                    if iteration%checkpoint_interval==0 or os.path.exists(save_file_check_path):# save model checkpoint every X iters
-                        if rank == 0:
-                            checkpoint_path = os.path.join(args.output_directory, f"checkpoint_{iteration}")
-                            save_checkpoint(model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss, average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
-                    
-                    if iteration%dump_filelosses_interval==0:# syncronise file_losses between graphics cards
-                        print("Updating File_losses dict!")
-                        file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
-                                                                      os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
-                    
-                    if (iteration % int(validation_interval) == 0) or (os.path.exists(save_file_check_path)):# validate models and save 'best_val_model' checkpoints
-                        if rank == 0 and os.path.exists(save_file_check_path):
-                            os.remove(save_file_check_path)
-                        # perform validation and save "best_val_model" depending on validation loss
-                        val_loss, best_val_loss_dict, file_losses = validate(hparams, args, file_losses, model, criterion, hifiGAN, valset, loss_scalars, best_val_loss_dict, iteration, collate_fn, logger)# validate/teacher_force
-                        file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
-                                                                      os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
-                        if (val_loss < best_validation_loss):
-                            best_validation_loss = val_loss
-                            if rank == 0 and hparams.save_best_val_model:
-                                checkpoint_path = os.path.join(args.output_directory, "best_val_model")
-                                save_checkpoint(
-                                    model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss,
-                                    average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
-                        just_did_val = True
-                    
-                    del y_pred, y, batch, loss_dict, reduced_loss_dict
-                    iteration += 1
-                    # end of iteration loop
+                            grad_norm = 0.0
+                        
+                        if math.isfinite(grad_norm):
+                            optimizer.step()
+                        
+                        # calcuate the effective learning rate after gradient clipping is applied, and use the effective learning rate on the GAN modules.
+                        effective_lr = 0.0 if is_overflow else (learning_rate*min((grad_clip_thresh/grad_norm+1e-6), 1.0) if grad_clip_thresh else learning_rate)
+                        
+                        # (Optional) Discriminator Forward+Backward Pass
+                        if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:
+                            hifiGAN.train(model.training)
+                            with torch.random.fork_rng(devices=[0,]):
+                                hifiGAN(y_pred, y, reduced_loss_dict, loss_dict, loss_scalars)
+                        
+                        # get current Loss Scale of first optimizer
+                        loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if hparams.fp16_run else 32768.
+                        
+                        # restart if training/model has collapsed
+                        if (iteration > 1e3 and (reduced_loss > LossExplosionThreshold)) or (math.isnan(reduced_loss)):
+                            raise LossExplosion(f"\nLOSS EXPLOSION EXCEPTION ON RANK {rank}: Loss reached {reduced_loss} during iteration {iteration}.\n\n\n")
+                        if (loss_scale < 1/4):
+                            raise LossExplosion(f"\nLOSS EXCEPTION ON RANK {rank}: Loss Scaler reached {loss_scale} during iteration {iteration}.\n\n\n")
+                        
+                        if expavg_loss_dict is None:
+                            expavg_loss_dict = reduced_loss_dict
+                        else:
+                            expavg_loss_dict.update({k:v for k, v in reduced_loss_dict.items() if k not in expavg_loss_dict.keys()})# if new loss term appears in reduced_loss_dict, add it to the expavg_loss_dict.
+                            expavg_loss_dict = {k: (reduced_loss_dict[k]*(1-loss_dict_smoothness))+(expavg_loss_dict[k]*loss_dict_smoothness) for k in expavg_loss_dict.keys() if k in reduced_loss_dict}
+                            expavg_loss_dict_iters += 1
+                        
+                        if expavg_loss_dict_iters > 100:# calc smoothed loss dict
+                            if best_loss_dict is None:
+                                best_loss_dict = expavg_loss_dict
+                            else:
+                                best_loss_dict = {k: min(best_loss_dict[k], expavg_loss_dict[k]) for k in best_loss_dict.keys() if k in expavg_loss_dict}
+                        
+                        if rank == 0:# print + log metrics
+                            duration = time.time() - start_time
+                            if not is_overflow:
+                                average_loss = rolling_loss.process(reduced_loss)
+                                dbGANAccStr  = expavg_loss_dict.get('dbGAN_accuracy',  None) or reduced_loss_dict.get('dbGAN_accuracy',  0.5)
+                                InfGANAccStr = expavg_loss_dict.get('InfGAN_accuracy', None) or reduced_loss_dict.get('InfGAN_accuracy', 0.5)
+                                WScoreStr    = expavg_loss_dict.get('weighted_score' , None) or reduced_loss_dict.get('weighted_score' , 0.0)
+                                logger.log_training(model, reduced_loss_dict, expavg_loss_dict, best_loss_dict, grad_norm, learning_rate, duration, iteration)
+                            tqdm.write(
+                                f"{iteration} [TrainLoss:{reduced_loss:.3f} Avg:{average_loss:.3f}] "
+                                f"[{grad_norm:03.1f}GradNorm] [{duration:.2f}s/it] "
+                                f"[{(duration/(hparams.batch_size*args.n_gpus)):.3f}s/file] "
+                                f"[{learning_rate:.1e}LR] [{loss_scale:.0f}LS] "
+                                f"[{WScoreStr:.1%}AttSc] [{dbGANAccStr:.1%}dbGANAcc] [{InfGANAccStr:.1%}InfGANAcc]")
+                            if is_overflow:
+                                tqdm.write("Gradient Overflow, Skipping Step\n")
+                            start_time = time.time()
+                        
+                        if iteration%checkpoint_interval==0 or os.path.exists(save_file_check_path):# save model checkpoint every X iters
+                            if rank == 0:
+                                checkpoint_path = os.path.join(args.output_directory, f"checkpoint_{iteration}")
+                                save_checkpoint(model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss, average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
+                        
+                        if iteration%dump_filelosses_interval==0:# syncronise file_losses between graphics cards
+                            print("Updating File_losses dict!")
+                            file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
+                                                                          os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
+                        
+                        if (iteration % int(validation_interval) == 0) or (os.path.exists(save_file_check_path)):# validate models and save 'best_val_model' checkpoints
+                            if rank == 0 and os.path.exists(save_file_check_path):
+                                os.remove(save_file_check_path)
+                            # perform validation and save "best_val_model" depending on validation loss
+                            val_loss, best_val_loss_dict, file_losses = validate(hparams, args, file_losses, model, criterion, hifiGAN, valset, loss_scalars, best_val_loss_dict, iteration, collate_fn, logger)# validate/teacher_force
+                            file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
+                                                                          os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
+                            if (val_loss < best_validation_loss):
+                                best_validation_loss = val_loss
+                                if rank == 0 and hparams.save_best_val_model:
+                                    checkpoint_path = os.path.join(args.output_directory, "best_val_model")
+                                    save_checkpoint(
+                                        model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss,
+                                        average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
+                            just_did_val = True
+                        
+                        del y_pred, y, batch, loss_dict, reduced_loss_dict
+                        iteration += 1
+                        # end of iteration loop
                 
                 # update filelist of training dataloader
                 print("Updating File_losses dict!")

From f6397729fc1c857b21df1470f7a8ddfc065823fd Mon Sep 17 00:00:00 2001
From: Sami <58188482+RAYTRAC3R@users.noreply.github.com>
Date: Sat, 13 Mar 2021 14:37:44 -0600
Subject: [PATCH 5/5] Update train.py

---
 CookieTTS/_2_ttm/VDVAETTS/train.py | 311 ++++++++++++++---------------
 1 file changed, 154 insertions(+), 157 deletions(-)

diff --git a/CookieTTS/_2_ttm/VDVAETTS/train.py b/CookieTTS/_2_ttm/VDVAETTS/train.py
index ae8abda..49981bd 100644
--- a/CookieTTS/_2_ttm/VDVAETTS/train.py
+++ b/CookieTTS/_2_ttm/VDVAETTS/train.py
@@ -758,165 +758,163 @@ def train(args, rank, group_name, hparams):
                     y = model.parse_batch(batch)# move batch to GPU (async)
                     y['gt_mel'].requires_grad_()
                     y['use_pred_z'] = False
-                    print(y['gt_mel'].shape[2], y['gt_frame_logf0'].shape[1])
-                    if y['gt_mel'].shape[2] == y['gt_frame_logf0'].shape[1]:
-                        y_pred = force(model, valid_kwargs=model_args, **y)
-                        
-                        loss_scalars = {
-                          "decoder_MAE_weight": decoder_MAE_weight,
-                          "decoder_MSE_weight": decoder_MSE_weight,
-                          "decoder_KLD_weight": decoder_KLD_weight,
-                          "varpred_MAE_weight": varpred_MAE_weight,
-                          "varpred_MSE_weight": varpred_MSE_weight,
-                          "varpred_KLD_weight": varpred_KLD_weight,
-                      "postnet_f0_MAE_weight": postnet_f0_MAE_weight,
-                      "postnet_f0_MSE_weight": postnet_f0_MSE_weight,
-                  "postnet_voiced_MAE_weight": postnet_voiced_MAE_weight,
-                  "postnet_voiced_BCE_weight": postnet_voiced_BCE_weight,
-                          "postnet_KLD_weight": postnet_KLD_weight,
-                          "postnet_MAE_weight": postnet_MAE_weight,
-                          "postnet_MSE_weight": postnet_MSE_weight,
-                            "mdn_loss_weight": mdn_loss_weight,
-                            "dur_loss_weight": dur_loss_weight,
-                            "sylps_MAE_weight": sylps_MAE_weight,
-                            "sylps_MSE_weight": sylps_MSE_weight,
-                            "diag_att_weight": diag_att_weight,
-                  "HiFiGAN_g_all_class_weight": HiFiGAN_g_all_class_weight,
-            "HiFiGAN_g_all_featuremap_weight": HiFiGAN_g_all_featuremap_weight,
-                "HiFiGAN_g_all_mel_mae_weight": HiFiGAN_g_all_mel_mae_weight,
-                  "HiFiGAN_d_all_class_weight": HiFiGAN_d_all_class_weight,
-                        }
-                        loss_dict, file_losses_batch = criterion(iteration, model, y_pred, y, loss_scalars,
-                                                    hifiGAN if hparams.HiFiGAN_enable else None,)
-                        
-                        file_losses = update_smoothed_dict(file_losses, file_losses_batch, file_losses_smoothness)
-                        
-                        if hparams.distributed_run:
-                            reduced_loss_dict = {k: reduce_tensor(v.data, args.n_gpus).item() if v is not None else 0. for k, v in loss_dict.items()}
-                        else:
-                            reduced_loss_dict = {k: v.item() if v is not None else 0. for k, v in loss_dict.items()}
-                        
-                        reduced_loss = reduced_loss_dict['loss']
-                        
-                        loss = loss_dict['loss']
+                    y_pred = force(model, valid_kwargs=model_args, **y)
+                    
+                    loss_scalars = {
+                      "decoder_MAE_weight": decoder_MAE_weight,
+                      "decoder_MSE_weight": decoder_MSE_weight,
+                      "decoder_KLD_weight": decoder_KLD_weight,
+                      "varpred_MAE_weight": varpred_MAE_weight,
+                      "varpred_MSE_weight": varpred_MSE_weight,
+                      "varpred_KLD_weight": varpred_KLD_weight,
+                   "postnet_f0_MAE_weight": postnet_f0_MAE_weight,
+                   "postnet_f0_MSE_weight": postnet_f0_MSE_weight,
+               "postnet_voiced_MAE_weight": postnet_voiced_MAE_weight,
+               "postnet_voiced_BCE_weight": postnet_voiced_BCE_weight,
+                      "postnet_KLD_weight": postnet_KLD_weight,
+                      "postnet_MAE_weight": postnet_MAE_weight,
+                      "postnet_MSE_weight": postnet_MSE_weight,
+                         "mdn_loss_weight": mdn_loss_weight,
+                         "dur_loss_weight": dur_loss_weight,
+                        "sylps_MAE_weight": sylps_MAE_weight,
+                        "sylps_MSE_weight": sylps_MSE_weight,
+                         "diag_att_weight": diag_att_weight,
+              "HiFiGAN_g_all_class_weight": HiFiGAN_g_all_class_weight,
+         "HiFiGAN_g_all_featuremap_weight": HiFiGAN_g_all_featuremap_weight,
+            "HiFiGAN_g_all_mel_mae_weight": HiFiGAN_g_all_mel_mae_weight,
+              "HiFiGAN_d_all_class_weight": HiFiGAN_d_all_class_weight,
+                    }
+                    loss_dict, file_losses_batch = criterion(iteration, model, y_pred, y, loss_scalars,
+                                                hifiGAN if hparams.HiFiGAN_enable else None,)
+                    
+                    file_losses = update_smoothed_dict(file_losses, file_losses_batch, file_losses_smoothness)
+                    
+                    if hparams.distributed_run:
+                        reduced_loss_dict = {k: reduce_tensor(v.data, args.n_gpus).item() if v is not None else 0. for k, v in loss_dict.items()}
+                    else:
+                        reduced_loss_dict = {k: v.item() if v is not None else 0. for k, v in loss_dict.items()}
+                    
+                    reduced_loss = reduced_loss_dict['loss']
+                    
+                    loss = loss_dict['loss']
+                    if hparams.fp16_run:
+                        with amp.scale_loss(loss, optimizer) as scaled_loss:
+                            scaled_loss.backward()
+                    else:
+                        loss.backward()
+                    
+                    if rank==0 and show_gradients:# debug/extreme verbose
+                        try:
+                            _=avg_grads
+                        except:
+                            avg_grads = {}
+                        for param_name, params in model.named_parameters():
+                            if params.requires_grad and params.grad is not None:
+                                norm_grad = 1.0
+                                grad = params.grad.abs().sum().item()
+                                if param_name not in avg_grads:
+                                    avg_grads[param_name] = grad
+                                elif grad*5. < avg_grads[param_name]:
+                                    avg_grads[param_name] = (avg_grads[param_name]*0.9)+(grad*0.1)
+                                norm_grad = grad/avg_grads[param_name]
+                                if grad > 30.0 or norm_grad > 2.0:
+                                    print(f'{norm_grad:03.1f} | {grad:020.6f} | {params.grad.abs().mean().item():06.6f}| {params.grad.abs().max().item():010.6f} | {param_name}')
+                    
+                    if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:# HiFiGAN isn't supposed to use gradient clipping so the optimizer.
+                        hifiGAN.g_optimizer_step_and_clear()#      should be ran before gradient clipping occurs.
+                    
+                    if grad_clip_thresh:# apply gradient clipping to params
                         if hparams.fp16_run:
-                            with amp.scale_loss(loss, optimizer) as scaled_loss:
-                                scaled_loss.backward()
+                            grad_norm = torch.nn.utils.clip_grad_norm_(
+                                amp.master_params(optimizer), grad_clip_thresh)
+                            is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
                         else:
-                            loss.backward()
-                        
-                        if rank==0 and show_gradients:# debug/extreme verbose
-                            try:
-                                _=avg_grads
-                            except:
-                                avg_grads = {}
-                            for param_name, params in model.named_parameters():
-                                if params.requires_grad and params.grad is not None:
-                                    norm_grad = 1.0
-                                    grad = params.grad.abs().sum().item()
-                                    if param_name not in avg_grads:
-                                        avg_grads[param_name] = grad
-                                    elif grad*5. < avg_grads[param_name]:
-                                        avg_grads[param_name] = (avg_grads[param_name]*0.9)+(grad*0.1)
-                                    norm_grad = grad/avg_grads[param_name]
-                                    if grad > 30.0 or norm_grad > 2.0:
-                                        print(f'{norm_grad:03.1f} | {grad:020.6f} | {params.grad.abs().mean().item():06.6f}| {params.grad.abs().max().item():010.6f} | {param_name}')
-                        
-                        if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:# HiFiGAN isn't supposed to use gradient clipping so the optimizer.
-                            hifiGAN.g_optimizer_step_and_clear()#      should be ran before gradient clipping occurs.
-                        
-                        if grad_clip_thresh:# apply gradient clipping to params
-                            if hparams.fp16_run:
-                                grad_norm = torch.nn.utils.clip_grad_norm_(
-                                    amp.master_params(optimizer), grad_clip_thresh)
-                                is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
-                            else:
-                                grad_norm = torch.nn.utils.clip_grad_norm_(
-                                    model.parameters(), grad_clip_thresh)
-                        else:
-                            grad_norm = 0.0
-                        
-                        if math.isfinite(grad_norm):
-                            optimizer.step()
-                        
-                        # calcuate the effective learning rate after gradient clipping is applied, and use the effective learning rate on the GAN modules.
-                        effective_lr = 0.0 if is_overflow else (learning_rate*min((grad_clip_thresh/grad_norm+1e-6), 1.0) if grad_clip_thresh else learning_rate)
-                        
-                        # (Optional) Discriminator Forward+Backward Pass
-                        if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:
-                            hifiGAN.train(model.training)
-                            with torch.random.fork_rng(devices=[0,]):
-                                hifiGAN(y_pred, y, reduced_loss_dict, loss_dict, loss_scalars)
-                        
-                        # get current Loss Scale of first optimizer
-                        loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if hparams.fp16_run else 32768.
-                        
-                        # restart if training/model has collapsed
-                        if (iteration > 1e3 and (reduced_loss > LossExplosionThreshold)) or (math.isnan(reduced_loss)):
-                            raise LossExplosion(f"\nLOSS EXPLOSION EXCEPTION ON RANK {rank}: Loss reached {reduced_loss} during iteration {iteration}.\n\n\n")
-                        if (loss_scale < 1/4):
-                            raise LossExplosion(f"\nLOSS EXCEPTION ON RANK {rank}: Loss Scaler reached {loss_scale} during iteration {iteration}.\n\n\n")
-                        
-                        if expavg_loss_dict is None:
-                            expavg_loss_dict = reduced_loss_dict
+                            grad_norm = torch.nn.utils.clip_grad_norm_(
+                                model.parameters(), grad_clip_thresh)
+                    else:
+                        grad_norm = 0.0
+                    
+                    if math.isfinite(grad_norm):
+                        optimizer.step()
+                    
+                    # calcuate the effective learning rate after gradient clipping is applied, and use the effective learning rate on the GAN modules.
+                    effective_lr = 0.0 if is_overflow else (learning_rate*min((grad_clip_thresh/grad_norm+1e-6), 1.0) if grad_clip_thresh else learning_rate)
+                    
+                    # (Optional) Discriminator Forward+Backward Pass
+                    if hparams.HiFiGAN_enable and y_pred['hifigan_enabled']:
+                        hifiGAN.train(model.training)
+                        with torch.random.fork_rng(devices=[0,]):
+                            hifiGAN(y_pred, y, reduced_loss_dict, loss_dict, loss_scalars)
+                    
+                    # get current Loss Scale of first optimizer
+                    loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if hparams.fp16_run else 32768.
+                    
+                    # restart if training/model has collapsed
+                    if (iteration > 1e3 and (reduced_loss > LossExplosionThreshold)) or (math.isnan(reduced_loss)):
+                        raise LossExplosion(f"\nLOSS EXPLOSION EXCEPTION ON RANK {rank}: Loss reached {reduced_loss} during iteration {iteration}.\n\n\n")
+                    if (loss_scale < 1/4):
+                        raise LossExplosion(f"\nLOSS EXCEPTION ON RANK {rank}: Loss Scaler reached {loss_scale} during iteration {iteration}.\n\n\n")
+                    
+                    if expavg_loss_dict is None:
+                        expavg_loss_dict = reduced_loss_dict
+                    else:
+                        expavg_loss_dict.update({k:v for k, v in reduced_loss_dict.items() if k not in expavg_loss_dict.keys()})# if new loss term appears in reduced_loss_dict, add it to the expavg_loss_dict.
+                        expavg_loss_dict = {k: (reduced_loss_dict[k]*(1-loss_dict_smoothness))+(expavg_loss_dict[k]*loss_dict_smoothness) for k in expavg_loss_dict.keys() if k in reduced_loss_dict}
+                        expavg_loss_dict_iters += 1
+                    
+                    if expavg_loss_dict_iters > 100:# calc smoothed loss dict
+                        if best_loss_dict is None:
+                            best_loss_dict = expavg_loss_dict
                         else:
-                            expavg_loss_dict.update({k:v for k, v in reduced_loss_dict.items() if k not in expavg_loss_dict.keys()})# if new loss term appears in reduced_loss_dict, add it to the expavg_loss_dict.
-                            expavg_loss_dict = {k: (reduced_loss_dict[k]*(1-loss_dict_smoothness))+(expavg_loss_dict[k]*loss_dict_smoothness) for k in expavg_loss_dict.keys() if k in reduced_loss_dict}
-                            expavg_loss_dict_iters += 1
-                        
-                        if expavg_loss_dict_iters > 100:# calc smoothed loss dict
-                            if best_loss_dict is None:
-                                best_loss_dict = expavg_loss_dict
-                            else:
-                                best_loss_dict = {k: min(best_loss_dict[k], expavg_loss_dict[k]) for k in best_loss_dict.keys() if k in expavg_loss_dict}
-                        
-                        if rank == 0:# print + log metrics
-                            duration = time.time() - start_time
-                            if not is_overflow:
-                                average_loss = rolling_loss.process(reduced_loss)
-                                dbGANAccStr  = expavg_loss_dict.get('dbGAN_accuracy',  None) or reduced_loss_dict.get('dbGAN_accuracy',  0.5)
-                                InfGANAccStr = expavg_loss_dict.get('InfGAN_accuracy', None) or reduced_loss_dict.get('InfGAN_accuracy', 0.5)
-                                WScoreStr    = expavg_loss_dict.get('weighted_score' , None) or reduced_loss_dict.get('weighted_score' , 0.0)
-                                logger.log_training(model, reduced_loss_dict, expavg_loss_dict, best_loss_dict, grad_norm, learning_rate, duration, iteration)
-                            tqdm.write(
-                                f"{iteration} [TrainLoss:{reduced_loss:.3f} Avg:{average_loss:.3f}] "
-                                f"[{grad_norm:03.1f}GradNorm] [{duration:.2f}s/it] "
-                                f"[{(duration/(hparams.batch_size*args.n_gpus)):.3f}s/file] "
-                                f"[{learning_rate:.1e}LR] [{loss_scale:.0f}LS] "
-                                f"[{WScoreStr:.1%}AttSc] [{dbGANAccStr:.1%}dbGANAcc] [{InfGANAccStr:.1%}InfGANAcc]")
-                            if is_overflow:
-                                tqdm.write("Gradient Overflow, Skipping Step\n")
-                            start_time = time.time()
-                        
-                        if iteration%checkpoint_interval==0 or os.path.exists(save_file_check_path):# save model checkpoint every X iters
-                            if rank == 0:
-                                checkpoint_path = os.path.join(args.output_directory, f"checkpoint_{iteration}")
-                                save_checkpoint(model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss, average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
-                        
-                        if iteration%dump_filelosses_interval==0:# syncronise file_losses between graphics cards
-                            print("Updating File_losses dict!")
-                            file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
-                                                                          os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
-                        
-                        if (iteration % int(validation_interval) == 0) or (os.path.exists(save_file_check_path)):# validate models and save 'best_val_model' checkpoints
-                            if rank == 0 and os.path.exists(save_file_check_path):
-                                os.remove(save_file_check_path)
-                            # perform validation and save "best_val_model" depending on validation loss
-                            val_loss, best_val_loss_dict, file_losses = validate(hparams, args, file_losses, model, criterion, hifiGAN, valset, loss_scalars, best_val_loss_dict, iteration, collate_fn, logger)# validate/teacher_force
-                            file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
-                                                                          os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
-                            if (val_loss < best_validation_loss):
-                                best_validation_loss = val_loss
-                                if rank == 0 and hparams.save_best_val_model:
-                                    checkpoint_path = os.path.join(args.output_directory, "best_val_model")
-                                    save_checkpoint(
-                                        model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss,
-                                        average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
-                            just_did_val = True
-                        
-                        del y_pred, y, batch, loss_dict, reduced_loss_dict
-                        iteration += 1
-                        # end of iteration loop
+                            best_loss_dict = {k: min(best_loss_dict[k], expavg_loss_dict[k]) for k in best_loss_dict.keys() if k in expavg_loss_dict}
+                    
+                    if rank == 0:# print + log metrics
+                        duration = time.time() - start_time
+                        if not is_overflow:
+                            average_loss = rolling_loss.process(reduced_loss)
+                            dbGANAccStr  = expavg_loss_dict.get('dbGAN_accuracy',  None) or reduced_loss_dict.get('dbGAN_accuracy',  0.5)
+                            InfGANAccStr = expavg_loss_dict.get('InfGAN_accuracy', None) or reduced_loss_dict.get('InfGAN_accuracy', 0.5)
+                            WScoreStr    = expavg_loss_dict.get('weighted_score' , None) or reduced_loss_dict.get('weighted_score' , 0.0)
+                            logger.log_training(model, reduced_loss_dict, expavg_loss_dict, best_loss_dict, grad_norm, learning_rate, duration, iteration)
+                        tqdm.write(
+                            f"{iteration} [TrainLoss:{reduced_loss:.3f} Avg:{average_loss:.3f}] "
+                            f"[{grad_norm:03.1f}GradNorm] [{duration:.2f}s/it] "
+                            f"[{(duration/(hparams.batch_size*args.n_gpus)):.3f}s/file] "
+                            f"[{learning_rate:.1e}LR] [{loss_scale:.0f}LS] "
+                            f"[{WScoreStr:.1%}AttSc] [{dbGANAccStr:.1%}dbGANAcc] [{InfGANAccStr:.1%}InfGANAcc]")
+                        if is_overflow:
+                            tqdm.write("Gradient Overflow, Skipping Step\n")
+                        start_time = time.time()
+                    
+                    if iteration%checkpoint_interval==0 or os.path.exists(save_file_check_path):# save model checkpoint every X iters
+                        if rank == 0:
+                            checkpoint_path = os.path.join(args.output_directory, f"checkpoint_{iteration}")
+                            save_checkpoint(model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss, average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
+                    
+                    if iteration%dump_filelosses_interval==0:# syncronise file_losses between graphics cards
+                        print("Updating File_losses dict!")
+                        file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
+                                                                      os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
+                    
+                    if (iteration % int(validation_interval) == 0) or (os.path.exists(save_file_check_path)):# validate models and save 'best_val_model' checkpoints
+                        if rank == 0 and os.path.exists(save_file_check_path):
+                            os.remove(save_file_check_path)
+                        # perform validation and save "best_val_model" depending on validation loss
+                        val_loss, best_val_loss_dict, file_losses = validate(hparams, args, file_losses, model, criterion, hifiGAN, valset, loss_scalars, best_val_loss_dict, iteration, collate_fn, logger)# validate/teacher_force
+                        file_losses = write_dict_to_file(file_losses, os.path.join(args.output_directory, 'file_losses.csv'),
+                                                                      os.path.join(args.output_directory, 'speaker_losses.csv'), speakerlist, args.n_gpus, rank)
+                        if (val_loss < best_validation_loss):
+                            best_validation_loss = val_loss
+                            if rank == 0 and hparams.save_best_val_model:
+                                checkpoint_path = os.path.join(args.output_directory, "best_val_model")
+                                save_checkpoint(
+                                    model, optimizer, hifiGAN, learning_rate, iteration, hparams, best_validation_loss,
+                                    average_loss, best_val_loss_dict, best_loss_dict, speaker_lookup, speakerlist, checkpoint_path)
+                        just_did_val = True
+                    
+                    del y_pred, y, batch, loss_dict, reduced_loss_dict
+                    iteration += 1
+                    # end of iteration loop
                 
                 # update filelist of training dataloader
                 print("Updating File_losses dict!")
@@ -1007,4 +1005,3 @@ def train(args, rank, group_name, hparams):
         pass
     
     train(args, args.rank, args.group_name, hparams)
-