diff --git a/ultravox/tools/ds_tool/ds_tool.py b/ultravox/tools/ds_tool/ds_tool.py index f62f278d..e99744fe 100644 --- a/ultravox/tools/ds_tool/ds_tool.py +++ b/ultravox/tools/ds_tool/ds_tool.py @@ -242,7 +242,7 @@ def _map_batch_combine(self, batch): audios = batch[self.audio_column_name] sentences = batch[self.asr_column_name] translations = batch[self.translation_column_name] - ids = batch["id"] + ids = batch[self.id_column_name] combined_audio = { "sampling_rate": audios[0]["sampling_rate"], @@ -256,7 +256,7 @@ def _map_batch_combine(self, batch): self.audio_column_name: [combined_audio], self.asr_column_name: [combined_sentences], self.translation_column_name: [combined_translations], - "id": combined_ids, + self.id_column_name: [combined_ids], } return new_batch @@ -396,7 +396,7 @@ def process_and_upload_split_rescursive( self.chunks_not_uploaded.append((start_index, end_index)) return None failed_chunk_ranges.append((chunk_start, chunk_end)) - successful_chunks = self.args.num_chunks - len(failed_chunk_ranges) + successful_chunks = total_chunks - len(failed_chunk_ranges) print( f"Finished processing and uploading {successful_chunks}/{total_chunks} chunks for range [{start_index}, {end_index})" )