-
Notifications
You must be signed in to change notification settings - Fork 0
/
py3translationServer.py
1744 lines (1439 loc) · 101 KB
/
py3translationServer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
py3translationServer.py exposes fairseq and CTranslate2 models over HTTP using the Tornado web server.
- Tornado is a Python web framework and asynchronous networking library with an emphasis on non-blocking network I/O.
- fairseq is library for machine learning and data modeling.
- CTranslate2 is a C++ and Python library for efficient inference with transformer models, including those used by fairseq.
- More information:
- https://www.tornadoweb.org
- https://github.com/facebookresearch/fairseq
- https://opennmt.net/CTranslate2
Install with:
- pip install tornado ctranslate2
- fairseq must be built from source since the 0.2.0 version available on PyPi is too old.
py3translationServer.py:
- Supports both CPU and GPU inferencing. 'GPU' is aliased to CUDA, but DirectML is also supported on Windows.
- Supports large batch requests.
- Supports both single process and multiprocess modes.
- In single process mode, the model is preloaded for low latency inferencing.
- In multiprocess mode, the model has significantly longer initial startup time (5 seconds+) but returns all memory allocated once the transaction completes. This is ideal for batch translations and long term operation.
Copyright: github/gdiaz384
License: AGPLv3, https://www.gnu.org/licenses/agpl-3.0.html
"""
#import multiprocessing
#if ( __name__ == '__main__' ):
# multiprocessing.freeze_support() # Does not work.
__version__ = '0.4 beta - 2024Mar18' #This should probably be __version__ by convention. 'version' by itself is wrong since there is a conflicting '--version' CLI option that must not be changed because then it would change the UI of the CLI. #Update. Changed it.
# Set global defaults:
defaultFileEncoding='utf-8'
defaultConsoleEncoding='utf-8'
#https://docs.python.org/3.8/library/codecs.html#error-handlers
defaultInputFileErrorHandling='strict'
# Set main program defaults:
# Valid values: cpu, gpu, cuda, directml. gpu is aliased to cuda.
# ROCm support is not currently implemented. Entering 'rocm' will use fairseq in CPU mode and error out CTranslate2.
defaultDevice='cpu'
# Use two letter language codes: www.loc.gov/standards/iso639-2/php/code_list.php
# Currently unused. Source and target languages must be specified at runtime.
defaultSourceLanguage='ja'
defaultTargetLanguage='en'
# This is relative to inputModelOrFolder which must be specified at the command prompt.
# Example sentence pieces: https://huggingface.co/JustFrederik
defaultSentencePieceModelFolder0='spm'
defaultSentencePieceModelFolder1='spmModel'
defaultSentencePieceModelFolder2='spmModels'
defaultSentencePieceModelPrefix='spm.'
defaultSentencePieceModelPostfix='.nopretok.model'
# If no sourceSentencePieceModel is specified, then use the defaultSentencePieceModelFolder0 together with defaultSourceLanguage to compute a value for sourceSentencePieceModel and check if it exists as a file. If it exists, use it. Example:
#'spm.ja.nopretok.model'
# If no targetSentencePieceModel is specified, then use the defaultSentencePieceModelFolder0 together with defaultTargetLanguage to compute a value for targetSentencePieceModel and check if it exists as a file. If it exists, use it. Example:
#'spm.en.nopretok.model'
defaultCTranslate2ModelName='model.bin'
# Host address and port. 0.0.0.0 means 'bind to all local addresses'.
#defaultAddress='0.0.0.0'
defaultAddress='localhost' # localhost has an alias of 127.0.0.1
defaultPort=14366
# The amount of time, in seconds, that must pass before the next request will trigger writing the cache to disk. Set to low value, like 1 to nearly always write out file.
# In some situations, writing the file may take several seconds. A safe minimum amount should be ~10 assuming a healthy disk and low to moderate active I/O.
defaultSaveCacheInterval=60
# The minumum time to wait in between allowing cache to be cleared meaning that cache cannot be cleared within this window of writing it out.
# Not implemented yet.
defaultMinimumClearCacheInterval=60
# Valid values are True or False. Default=True. Set to False to overwrite cache.csv in-place without creating a copy. Not implemented yet.
defaultCreateBackupOfCacheFile=True
# This is relative to path of main script or the local environment. TODO: The path handling logic should be updated to not break if an absolute path is entered here.
defaultCacheLocation='resources/cache'
# defaultCacheLocation is normally used to store cache. Setting the following to True changes the storage location of the cache to:
# Windows: os.getenv('LOCALAPPDATA') / py3translationServer/cache
# Linux: ~/.config/py3translationServer/cache
# Not implemented yet.
defaultStoreCacheInLocalEnvironment=False
# Valid values are spawn, fork, and forkserver. Changing this will lead to untested behavior.
# https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods
defaultProcessesSpawnTechnique='spawn'
# fairseq does not play well with multithreading or multiprocessing, so create a toggle to help troubleshooting.
defaultfairseqMultithreadingEnabled=True
# These are internal variable names for fairseq and CTranslate2, so they use a slightly different variable naming scheme.
# Fairseq documentation and source code:
# https://fairseq.readthedocs.io/en/latest/models.html#fairseq.models.transformer.TransformerModel
# https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_base.py
# https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_legacy.py
# https://fairseq.readthedocs.io/en/latest/_modules/fairseq/models/fairseq_model.html#BaseFairseqModel.from_pretrained
# https://fairseq.readthedocs.io/en/latest/_modules/fairseq/tasks/translation.html?highlight=source_lang
# https://fairseq.readthedocs.io/en/latest/command_line_tools.html#fairseq-interactive
# CTranslate2 documentation and source code:
# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html
# Valid bpe values are: byte_bpe, bytes, characters, fastbpe, gpt2, bert, hf_byte_bpe, sentencepiece, subword_nmt
# Depends upon model/model format used.
# Note that OpenNMT refers to this as as the tokenizer type but fairseq uses a different tokenizer concept for their UI: moses, nltk, space. This default_bpe uses the value options defined by fairseq.
default_bpe='sentencepiece'
# CTranslate2 documentation:
# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html
# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch
# Maximum number of parallel translations. Higher values affect video memory usage. Seems to have no or little effect on CPU loads and processing time.
default_inter_threads=16
# Number of OpenMP CPU threads per translator (0 to use a default value). if the psutil library is available, then this will be updated dynamically.
default_intra_threads=0
# https://fairseq.readthedocs.io/en/latest/_modules/fairseq/tasks/fairseq_task.html?highlight=beam_size
#beam_size is the number of tokens generated by the model. The best one will be chosen as the return value. Directly affects quality. This is the main speed vs quality setting.
# CTranslate2 default=2. Changed to 5 as per default setting in fairseq source code. Set beam size (1 for greedy search). Best performance is 1.
default_beam_size=5
# Number of results to return.
default_num_hypotheses=1
default_no_repeat_ngram_size=3
# Setting this to True corrupts the output, so leave as False until correct vmap can be built. Update: Added this to CLI instead.
#default_use_vmap=False
#Might be an interesting read: https://docs.python.org/3/library/configparser.html
import argparse # Used to add command line options.
import sys # End program on fail condition. Technically, this always exits as an error for anything but sys.exit(0) even if just trying to close normally, but w/e.
import os # Test if file/folder exists.
#import io, iostream, gen # Optional. Read from and write to objects in memory as if they were files. Used for sending cache.csv directly from memory and perhaps will be used later for cache.csv.zip. Not fully implemented yet. Import conditionally later if needed.
#import socket # Optional. Used to get IP's and print them to clarify to the user where Tornado is listening. Import as needed.
import pathlib # Part of standard library since 3.4. Imported for Path class which does sane path handling.
import json # Accept JSON as input. Return JSON after processing.
import time # Optional library. Used to calculate performance metrics. Import conditionally later. #Update: perfMetrics, cache write out time, and clear cache time require this, so just always include it instead. Part of standard library anyway.
#import csv # Used to read/write cache files. Import conditionally later based upon if cache is enabled or not.
#import date or datetime # Humm. Could be used to append the current date to the cache backup file as cache.hash.csv.backup.Today.csv
import signal #Sometimes required library. This is needed to send signal.SIGTERM to terminate processes when fairseq + CPU hangs. import conditionally as needed. Also used for UI.
#import inspect #Used to print out the name of the current function during execution which is useful when debugging. Import conditionally later.
import hashlib # Used to identify correct cache.csv on disk and also as a psudo-rng function for temporary writes.
#import fairseq # Core engine. Must be installed with 'pip install fairseq' or built from source. Import conditionally later.
#import ctranslate2 # Core engine. Must be installed with 'pip install ctranslate2'. Import conditionally later.
#import sentencepiece # Core dependency. Must be installed with 'pip install sentencepiece' Used for both fairseq and ctranslate2. However, fairseq will import it internally, like with PyTorch, so do not worry about it explicitly unless ctranslate2 is specified.
import asyncio # Used for asynconous I/O. Part of standard library since 3.4. Is also a tornado dependency.
import multiprocessing # Part of standard library. Used for Process, Queue objects. Used in core logic and also in cache logic. #Should probably import conditionally. #Update, this is still needed, even with concurrent.futures, to deterministically set the spawn type for the child processes, spawn, fork, forserver, but is still technically optional if cache is not enabled and if preloadModel==True. Annoying to import conditionally.
import concurrent.futures # Used to create a process that can work with asynconous I/O. Basically asyncio + multiprocessing.
import tornado # Web server. tornado.escape.json_decode creates Python dictionary from input json. Must be installed with 'pip install tornado'.
import tornado.web # This duplicate explicit import improves compatibility with Python versions < 3.8 and pyinstaller.
try:
import psutil # This library is required for fairseq + CPU + multiprocessing, but technically optional otherwise. This library is also used to optimize CTranslate2 to use the number of physical cores if running on CPU. #Update: It should be possible to remove this requirement by altering the way the new process returns its data to always return the process ID. However, the signal library would still be required and sending signal.SIGTERM to the process might be more complicated, os specific, or unsafe. Update: This is also used to identify and child processes when launching the UI in order to close them during shutdown, so back in required territory.
psutilAvailable=True
except ImportError:
psutilAvailable=False
# Set some more defaults that need to be after the import statments.
currentScriptNameWithoutPath=str( os.path.basename(str(__file__)) )
usageHelp=' Usage: ' + currentScriptNameWithoutPath + ' -h'
defaultSysCacheLocationWin=os.getenv('LOCALAPPDATA')
defaultSysCacheLocationWin='~/.local/'+currentScriptNameWithoutPath
# Update ctrl + c handler on Windows. Linux should work mostly as expected without modification.
# From Shital Shah at https://stackoverflow.com/questions/1364173/stopping-python-using-ctrlc
# Had to change b=None to no default value, but ctrl+c seems to work more reliably now. Maybe. Still does not work sometimes.
# The only workaround might be to always launch the .py from its own .cmd and then tell cmd to close.
# The b in handler also does not always work but setting a default is also error prone.
# install with: pip install pywin32
#def handler(a,b):
# sys.exit(0)
if sys.platform == 'win32':
try:
# Load different handler function for different Python versions to sometimes improve compatibility for older versions.
# This maybe sometimes breaks compatibility for newer Python versions, maybe.
if int(sys.version_info[1]) >= 8:
def handler(a,b):
sys.exit(0)
else:
def handler(a,b=None):
sys.exit(0)
import win32api
win32api.SetConsoleCtrlHandler(handler, True)
except ImportError:
pass
# Add command line options.
commandLineParser=argparse.ArgumentParser(description='Description: '+ currentScriptNameWithoutPath + ' exposes fairseq and CTranslate2 models over HTTP using the Tornado web server. ' + usageHelp)
commandLineParser.add_argument('mode', help='Must be fairseq or ctranslate2.', default=None, type=str)
commandLineParser.add_argument('modelPath', help='For fairseq, the model.pretrain, including path. For CTranslate2, the folder containing model.bin.', default=None, type=str)
commandLineParser.add_argument('-dev', '--device', help='Process using cpu, gpu, cuda, or directml. gpu is aliased to cuda. rocm is not supported yet. Default='+defaultDevice, default=defaultDevice, type=str)
commandLineParser.add_argument('-sl', '--sourceLanguage', help='Two letter source language code. See: www.loc.gov/standards/iso639-2/php/code_list.php Default=None', default=None, type=str)
commandLineParser.add_argument('-tl', '--targetLanguage', help='Two letter target language code. See: www.loc.gov/standards/iso639-2/php/code_list.php Default=None', default=None, type=str)
commandLineParser.add_argument('-sspm', '--sourceSentencePieceModel', help='The source sentencepiece model name and path. Default is based on source language.', default=None, type=str)
commandLineParser.add_argument('-tspm', '--targetSentencePieceModel', help='The target sentencepiece model and path. Default is based on target language.', default=None, type=str)
commandLineParser.add_argument('-pm', '--preloadModel', help='Make the system run out of memory. Default=Disabled.', action='store_true')
commandLineParser.add_argument('-t', '--cpuThreads', help='Specify the number of CPU threads. Only affects CTranslate2. If the psutil library is available, the default is the number of physical cores. Otherwise without psutil, CTranslate2 will use its internal values. Using psutil requires installing it via: pip install psutil', default=None, type=int)
commandLineParser.add_argument('-vm', '--useVMap', help='For CTranslate2, enabe the use of a vocabulary map. Must be named vmap.txt. Default=False.', action='store_true')
commandLineParser.add_argument('-dpm', '--disablePerfMetrics', help='Disable tracking and reporting of performance metrics. Default=Enabled.', action='store_false')
commandLineParser.add_argument('-c', '--cache', help='Toggle cache setting from default. Enabling cache saves the results of the model for future requests. Default=cache is enabled.', action='store_false')
commandLineParser.add_argument('-ui', '--uiPath', help='Specify the path to the streamlit UI. Using streamlit requires installing it via: pip install streamlit', default=None, type=str)
commandLineParser.add_argument('-a', '--address', help='Specify the address to listen on. To bind to all addresses, use 0.0.0.0 Default is to bind to: '+ str(defaultAddress), default=defaultAddress, type=str)
commandLineParser.add_argument('-p', '--port', help='Specify the port the local server will use. Default=' + str(defaultPort), default=defaultPort, type=int)
commandLineParser.add_argument('-cfe', '--cacheFileEncoding', help='Specify the encoding used for cache.csv. Default='+defaultFileEncoding,default=defaultFileEncoding, type=str)
commandLineParser.add_argument('-ce', '--consoleEncoding', help='Specify the encoding used for certain types of stdout. Default='+defaultConsoleEncoding,default=defaultConsoleEncoding, type=str)
commandLineParser.add_argument('-ifeh', '--inputFileErrorHandling', help='If the input from files cannot be read perfectly using the specified encoding, what should happen? See: https://docs.python.org/3.8/library/codecs.html#error-handlers Default is to crash the program.', default=defaultInputFileErrorHandling, type=str)
commandLineParser.add_argument('-v', '--version', help='Print version information and exit.', action='store_true')
commandLineParser.add_argument('-vb', '--verbose', help='Print more information.', action='store_true')
commandLineParser.add_argument('-d', '--debug', help='Print too much information.', action='store_true')
# Parse command line settings.
commandLineArguments=commandLineParser.parse_args()
mode=commandLineArguments.mode
inputModelFileOrFolder=commandLineArguments.modelPath
device=commandLineArguments.device
sourceLanguage=commandLineArguments.sourceLanguage
targetLanguage=commandLineArguments.targetLanguage
sourceSentencePieceModel=commandLineArguments.sourceSentencePieceModel
targetSentencePieceModel=commandLineArguments.targetSentencePieceModel
preloadModel=commandLineArguments.preloadModel
intra_threads=commandLineArguments.cpuThreads
use_vmap=commandLineArguments.useVMap
perfMetrics=commandLineArguments.disablePerfMetrics
cacheEnabled=commandLineArguments.cache
uiPath=commandLineArguments.uiPath
address=commandLineArguments.address
port=commandLineArguments.port
cacheFileEncoding=commandLineArguments.cacheFileEncoding
consoleEncoding=commandLineArguments.consoleEncoding
inputErrorHandling=commandLineArguments.inputFileErrorHandling
version=commandLineArguments.version
verbose=commandLineArguments.verbose
debug=commandLineArguments.debug
# Validate input.
if (perfMetrics == True) or (verbose==True) or (debug == True):
#import time # Optional library. Used to calculate performance metrics. #Update, processing time should be optionally reported even if verbose==True, so load it if either of those conditions are true. Debug being true implies that verbose is as well. # Update2. Will need to always import time at some point for cache functionality for delaying writing out cache file for at least 30s, ideally 60s.
startedLoadingTime = time.perf_counter()
if version == True:
sys.exit( (currentScriptNameWithoutPath + ' ' + __version__).encode(consoleEncoding) )
if debug == True:
verbose = True
import inspect #Used to print out the name of the current function during execution which is useful when debugging.
# Define helper functions to help validate input.
def verifyThisFileExists(myFile,nameOfFileToOutputInCaseOfError=None):
if myFile == None:
sys.exit( ('Error: Please specify a valid file for: ' + str(nameOfFileToOutputInCaseOfError) + usageHelp).encode(consoleEncoding))
if os.path.isfile(myFile) != True:
sys.exit( (' Error: Unable to find file \'' + str(nameOfFileToOutputInCaseOfError) + '\' ' + usageHelp).encode(consoleEncoding) )
def verifyThisFolderExists(myFolder, nameOfFileToOutputInCaseOfError=None):
if myFolder == None:
sys.exit( ('Error: Please specify a valid folder for: ' + str(nameOfFileToOutputInCaseOfError) + usageHelp).encode(consoleEncoding))
if os.path.isdir(myFolder) != True:
sys.exit( (' Error: Unable to find folder \'' + str(nameOfFileToOutputInCaseOfError) + '\' ' + usageHelp).encode(consoleEncoding) )
def checkIfThisFileExists(myFile):
if (myFile == None) or (os.path.isfile(myFile) != True):
return False
return True
def checkIfThisFolderExists(myFolder):
if (myFolder == None) or (os.path.isdir(myFolder) != True):
return False
return True
#Update path of current script.
currentScriptPathObject = pathlib.Path( __file__ ).absolute()
currentScriptPathOnly = str(currentScriptPathObject.parent) #Does not include last / and this will return one subfolder up if it is called on a folder.
#currentScriptNameWithoutPath= #This was defined earlier already
inputModelFileNameAndPath=None
inputModelPathOnly=None
inputModelNameWithoutPath=None
# mode and inputModel will always be used at the CLI as required inputs, so just need to validate they are correct.
# mode must be fairseq or CTranslate2
if mode.lower() == 'fairseq':
try:
import fairseq
except ImportError:
sys.exit( 'Error: fairseq was selected for mode but cannot be imported. Please install it with: pip install fairseq' )
mode = 'fairseq'
# inputModelFileOrFolder must be a file and it must exist
verifyThisFileExists( inputModelFileOrFolder , 'inputModelFileOrFolder' )
#If there is a folder specified, could also try to auto detect a pretrained.pt model for increased flexibility.
# Create subtypes here using Path library, like path only, extension only. Not sure how they will be used/useful, but can just comment out later.
inputModelFileNameAndPath=inputModelFileOrFolder
inputModelPathObject= pathlib.Path( inputModelFileNameAndPath ).absolute()
inputModelPathOnly = str(inputModelPathObject.parent) # Does not include last /, and this will return one subfolder up if it is called on a folder.
inputModelNameWithoutPath = inputModelPathObject.name
elif mode.lower() == 'ctranslate2':
try:
import ctranslate2
except ImportError:
sys.exit( 'Error: ctranslate2 was selected for mode but cannot be imported. Please install it with: pip install ctranslate2' )
try:
import sentencepiece
except ImportError:
sys.exit( 'Error: sentencepiece cannot be imported. Please install sentencepiece with: pip install sentencepiece' )
mode = 'ctranslate2'
inputModelFileOrFolderObject=pathlib.Path( inputModelFileOrFolder ).absolute()
# If the specified path is a file, then get the folder from the str(pathlib.Path(myPath).parent)
# and then continue to run as normal. ctranslate2 will refuse to load the model if not valid, so do not worry about it.
if checkIfThisFileExists(inputModelFileOrFolder) == True:
inputModelFileNameAndPath=str(inputModelFileOrFolderObject)
inputModelPathOnly=str(inputModelFileOrFolderObject.parent)
inputModelNameWithoutPath=inputModelFileOrFolderObject.name
else:
#inputModelFileOrFolder must be a folder and it must exist
# The model must also exist inside of it, but maybe let the ctranslate2 library worry about that? It might have its own code for detecting different ctranslate2 formats or w/e.
verifyThisFolderExists(inputModelFileOrFolder,'inputModelFileOrFolder')
# Create subtypes here using Path library.
inputModelFileNameAndPath=str(inputModelFileOrFolderObject) + '/' + defaultCTranslate2ModelName
inputModelPathOnly=str(inputModelFileOrFolderObject)
# if no model name was specified, then fudge the model name based upon last folder in the path. #Might want to just set this to the defaultCTranslate2ModelName instead.
inputModelNameWithoutPath=inputModelFileOrFolderObject.parts[len(inputModelFileOrFolderObject.parts)-1]
else:
sys.exit( ('Error: mode must be ctranslate2 or fairseq. Mode=' + str(mode)).encode(consoleEncoding) )
# Now that inputModelNameWithoutPath is known, update some more variables for later use.
scriptNameWithVersion = currentScriptNameWithoutPath + '/' +__version__
scriptNameWithVersionDictionary = { 'content' : scriptNameWithVersion }
modeAndModelName = mode + '/' + inputModelNameWithoutPath
modeAndModelNameDictionary = { 'content' : modeAndModelName }
# verify device
if device.lower() == 'cpu':
device='cpu'
elif device.lower() == 'gpu':
# Create alias.
device='cuda'
elif device.lower() == 'cuda':
device='cuda'
elif device.lower() == 'rocm':
device='rocm'
elif device.lower() == 'directml':
device='directml'
if mode != 'fairseq':
sys.exit( ('Error: Device \'directml\' is only valid for fairseq. Mode=\''+ mode + '\' Current device=\'' + device +'\'').encode(consoleEncoding) )
try:
# https://github.com/microsoft/DirectML/tree/master/PyTorch/1.13
import torch
import torch_directml
dml = torch_directml.device()
except ImportError:
sys.exit( 'Problem avoided: directml was specified but did not import sucessfully. Consider using anything else, like ctranslate2. Installing directml will trash any existing PyTorch installation. Do not use. Alternatively: pip install torch-directml')
else:
sys.exit( ('Error: Unrecognized device=\'' + device + '\' Must be cpu, gpu, cuda, rocm, or directml.').encode(consoleEncoding) )
# Update cache path and related settings.
# lazyHash is for use in a different process so that the main process does not get overfilled with memory that it will never again use when the entire model contents are read into memory.
# Proper way is probably to create a thread and read the file in chunks, but since the model is expected to be in memory later on anyway, reading it all in at once does not bloat the memory requirements of this program beyond what they already are. However, not reading it in either in another process, or in chunks would bloat the size.
def lazyHash(fileNameAndPath,myQueue):
# SHA1
with open(inputModelFileNameAndPath,'rb') as myFile:
myFileContents=myFile.read()
#modelHash=str(hashlib.sha1(myFileContents).hexdigest())[:10]
myQueue.put( str( hashlib.sha1(myFileContents).hexdigest() ) )
# CRC32
# So, this returns a different crc32 than 7-Zip regardless of binascii/zlip or the 'bitwise and' fix.
# Apparently, there are different sub standards for CRC32.
# https://reveng.sourceforge.io/crc-catalogue/all.htm
# Since SHA1 is too long, CRC32 is just borked, and there are no CRC64 libs in the Python standard library, just use a trunkated SHA1 hash as a compromise. Quirky, but whatever.
#import zlib
#import binascii
#with open(inputModelFileNameAndPath,'rb') as myFile:
# myFileContents=myFile.read()
# #modelHash=binascii.crc32(myFileContents)
#myQueue.put(str( (zlib.crc32(myFileContents)) & 0xffffffff) )
# This turns translationCacheDictionary into a csv file at cacheFilePathAndName.
# That .csv can grow quite large, so support optional compression perhaps?
# https://docs.python.org/3/library/zipfile.html
# This UI is a bit odd. It should accept a dictionary and a fileNameAndPath. This should probably be turned into a Class that wraps a dictionary and handles the I/O.
def writeOutCache():
# Spaghetti.
global translationCacheDictionary
global modelHashFull
# Redundant, but it is better to be paranoid.
pathlib.Path( cacheFilePathOnly ).mkdir( parents = True, exist_ok = True )
#cacheFilePathOnly
#cacheFilePathAndName is the final location
#cacheFileNameOnly
#hashlib.sha1(myFileContents).hexdigest()
randomNumber=hashlib.sha1(cacheFilePathAndName.encode(consoleEncoding))
randomNumber.update(str(time.perf_counter()).encode(consoleEncoding))
randomNumber=str(randomNumber.hexdigest())[:8]
temporaryFileNameAndPath=cacheFilePathOnly + '/' + 'cache.temp.' + randomNumber + '.csv'
if debug == True:
print( 'temporaryFileNameAndPath=' + temporaryFileNameAndPath )
#write to temporary file first.
with open(temporaryFileNameAndPath, 'w', newline='', encoding=cacheFileEncoding) as myOutputFileHandle:
myCsvHandle = csv.writer(myOutputFileHandle)
myCsvHandle.writerow(['rawText',inputModelNameWithoutPath + '.' +modelHashFull])
for i, k in translationCacheDictionary.items():
myCsvHandle.writerow( [str(i),str(k)] )
if checkIfThisFileExists(temporaryFileNameAndPath) == True:
#Replace any existing cache with the temporary one.
pathlib.Path(temporaryFileNameAndPath).replace(cacheFilePathAndName)
print( ('Wrote cache to disk at: ' + cacheFilePathAndName).encode(consoleEncoding) )
else:
print( ('Warning: Error writing temporary cache file at:' + temporaryFileNameAndPath).encode(consoleEncoding) )
#This turns translationCacheDictionary into a csv file at cacheFilePathAndName.
def clearCache():
global translationCacheDictionary
translationCacheDictionary={}
print( 'Cleared cache.' )
if ( __name__ == '__main__' ) and ( cacheEnabled == True ):
# import libraries specific to handling cache.
import csv #i/o cache to disk
#import hashlib # Used to identify correct cache.csv on disk and also as a psudo-rng function for temporary writes.
# Initialize translationCacheDictionary
translationCacheDictionary={}
# Initalize timeCacheWasLastWritten
timeCacheWasLastWritten=time.perf_counter()
timeCacheWasLastCleared=time.perf_counter()
verifyThisFileExists(inputModelFileNameAndPath,'modelNameAndPath')
if debug == True:
print('cacheEnabled='+str(cacheEnabled))
print( 'Attempting to read cache for model: ' + str(inputModelFileNameAndPath) )
# Dump the work of reading the file onto another process so main process does not have to deal with it.
# This is low level code according to the concurrent.futures python docs since that documentation refers to itself as a high-level wrapper for multiprocessing.
# https://docs.python.org/3/library/concurrent.futures.html
# https://docs.python.org/3/library/multiprocessing.html
modelHash=None
myQueue = multiprocessing.Queue()
lazyHashFunction = multiprocessing.Process(target=lazyHash, args=(inputModelFileNameAndPath,myQueue,) )
lazyHashFunction.start()
modelHashFull = myQueue.get()
lazyHashFunction.join()
if modelHashFull == None:
sys.exit( ('Error: Could not generate hash from model file.' + str(inputModelFileNameAndPath)).encode(consoleEncoding) )
modelHash=modelHashFull[:10] # Truncate hash to make the file name more friendly to file system length limitations.
cacheFilePathOnly=currentScriptPathOnly+'/'+defaultCacheLocation
cacheFileNameOnly='cache.'+ modelHash + '.csv' #Hardcoded. Maybe add prefix and postfix variables?
cacheFilePathAndName=cacheFilePathOnly + '/' + cacheFileNameOnly
if debug == True:#Maybe change this to debug for final settings.
print( 'modelHash=' + str(modelHash) )
print( 'cacheFilePathOnly=' + cacheFilePathOnly )
print( 'cacheFileNameOnly=' + cacheFileNameOnly )
if verbose == True:
print( 'cacheFilePathAndName=' + cacheFilePathAndName )
if checkIfThisFileExists(cacheFilePathAndName) == True:
# Then cache exists. Path to it also already exists.
# Read entries to translationCacheDictionary.
# If valid then read as normal, but if any error occurs, then print out that there was an error when reading the cache file and just use a new one.
try:
with open(cacheFilePathAndName, newline='', encoding=cacheFileEncoding, errors=inputErrorHandling) as myFileHandle:
csvReader = csv.reader(myFileHandle, strict=True)
currentLine=0
for line in csvReader:
# Skip first line.
if currentLine == 0:
currentLine+=1
elif currentLine != 0:
#if ignoreWhitespace == True:
for i in range(len(line)):
line[i]=line[i].strip()
if line[1] == '':
line[1] = None
translationCacheDictionary[line[0]]=line[1]
except:
print( ('Warning: Reinitalizing cache due to error reading input cache.csv: ' + cacheFilePathAndName).encode(consoleEncoding) )
translationCacheDictionary={}
if debug == True:
print( ('translationCacheDictionary=' + str(translationCacheDictionary)).encode(consoleEncoding) )
print( 'Number of entries loaded into cache: ' + str(len(translationCacheDictionary)) )
# Rename cache file to backup file regardless of I/O errors. File has already been verified to exist. Rename to backup.
#print('pie')
cacheBackupFileName=cacheFilePathAndName + '.backup'
pathlib.Path(cacheFilePathAndName).replace(cacheBackupFileName) #It might make sense to append the date the backup was made, but could also just leave well enough alone.
print ( ('Moved old cache.csv to: ' + cacheBackupFileName).encode(consoleEncoding) )
# So, if the 'old' cache is moved and the still-in-memory-cache is never written out, then the cache will be deleted if the user does not translate at least 1 entry to trigger a cache write. To avoid that weird bug, flush cache here. This is a bit wasteful over just binary copying the file or not moving it until needed, but it also tests to make sure I/O actually works during initalization, so leave it.
if len(translationCacheDictionary) > 0:
writeOutCache()
else:
# Then cache does not exist. Create path. File will be created later when writing out entries.
if verbose == True:
print( (' Cache file not found. Creating a new one at: '+str(cacheFilePathAndName)).encode(consoleEncoding) )
pathlib.Path( cacheFilePathOnly ).mkdir( parents = True, exist_ok = True )
if (sourceLanguage == None) and (checkIfThisFileExists(sourceSentencePieceModel) != True):
sys.exit ('Please specify a source language or a valid sourceSentencePieceModel.')
if (targetLanguage == None) and (checkIfThisFileExists(targetSentencePieceModel) != True):
sys.exit ('Please specify a target language or a valid targetSentencePieceModel.')
#So the sentence piece source model is always required. For ctranslate2 both source and target models are both required. If not present, then try to use the defaults and/or the specified language to guess them.
if checkIfThisFileExists(sourceSentencePieceModel) == True:
# if a source language was not specified, try to guess source language based upon source sentencepiece model.
if (sourceLanguage == None):
#sourceSentencePieceModelPathObject = pathlib.Path(sourceSentencePieceModel).absolute()
#sourceSentencePieceModelNameOnly = sourceSentencePieceModelPathObject.name
sourceSentencePieceModelNameOnly = pathlib.Path(sourceSentencePieceModel).name
# check to make sure both prefix and post fix are found in sourceSentencePieceModelNameOnly
# error out if either of them are not found because they must both be present
if (sourceSentencePieceModelNameOnly.find( defaultSentencePieceModelPrefix ) == -1) or ( sourceSentencePieceModelNameOnly.find( defaultSentencePieceModelPostfix ) == -1):
sys.exit('Unable to determine source language from sentencepiece model name. Please specify --sourceLanguage (-sl).' + usageHelp)
# Remove prefix and postfix from the name.
tempString=sourceSentencePieceModelNameOnly.replace(defaultSentencePieceModelPrefix,'')
tempString=tempString.replace(defaultSentencePieceModelPostfix,'')
#If the result is not length = 2 or length=3, then error out,
if ( len(tempString) <=1 ) or ( len(tempString) >= 4):
sys.exit('Unable to determine source language from sentencepiece model name. Incorrect length. Please specify --sourceLanguage (-sl).' + usageHelp)
#otherwise set source language to those two or three characters.
sourceLanguage=tempString
print( ('Set sourceLanguage to \'' + sourceLanguage + '\' from: \'' + sourceSentencePieceModelNameOnly + '\'.').encode(consoleEncoding) )
#if checkIfThisFileExists(sourceSentencePieceModel) != True:
else:
tempFileName=defaultSentencePieceModelPrefix+sourceLanguage+defaultSentencePieceModelPostfix
#tempPath=inputModelPathOnly
if checkIfThisFileExists(inputModelPathOnly + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/../' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly + '/../' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly+ '/' + defaultSentencePieceModelFolder0 + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly+ '/' + defaultSentencePieceModelFolder0 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly+ '/' + defaultSentencePieceModelFolder1 + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly+ '/' + defaultSentencePieceModelFolder1 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly+ '/' + defaultSentencePieceModelFolder2 + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly+ '/' + defaultSentencePieceModelFolder2 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly+ '/../' + defaultSentencePieceModelFolder0 + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly+ '/../' + defaultSentencePieceModelFolder0 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly+ '/../' + defaultSentencePieceModelFolder1 + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly+ '/../' + defaultSentencePieceModelFolder1 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly+ '/../' + defaultSentencePieceModelFolder2 + '/' + tempFileName) == True:
sourceSentencePieceModel=inputModelPathOnly+ '/../' + defaultSentencePieceModelFolder2 + '/' + tempFileName
verifyThisFileExists(sourceSentencePieceModel,'sourceSentencePieceModel')
if __name__ == '__main__':
print( ('Set sourceSentencePieceModel to \'' + str(sourceSentencePieceModel) + '\' from: \'' + sourceLanguage + '\'.').encode(consoleEncoding) )
if checkIfThisFileExists(targetSentencePieceModel) == True:
#If a target language was not specified, try to guess target language based upon target sentencepiece model.
if (targetLanguage == None):
targetSentencePieceModelNameOnly = pathlib.Path(targetSentencePieceModel).name
# check to make sure both prefix and post fix are found in targetSentencePieceModelNameOnly
# error out if either of them are not found because they must both be present
if (targetSentencePieceModelNameOnly.find( defaultSentencePieceModelPrefix ) == -1) or ( targetSentencePieceModelNameOnly.find( defaultSentencePieceModelPostfix ) == -1):
sys.exit('Unable to determine target language from sentencepiece model name. Please specify --targetLanguage (-sl).' + usageHelp)
# Remove prefix and postfix from the name.
tempString=targetSentencePieceModelNameOnly.replace(defaultSentencePieceModelPrefix,'')
tempString=tempString.replace(defaultSentencePieceModelPostfix,'')
#If the result is not length = 2 or length=3, then error out,
if ( len(tempString) <=1 ) or ( len(tempString) >= 4):
sys.exit('Unable to determine target language from sentencepiece model name. Incorrect length. Please specify --targetLanguage (-sl).' + usageHelp)
#otherwise set target language to those two or three characters.
targetLanguage=tempString
if __name__ == '__main__':
print( ('Set targetLanguage to \'' + targetLanguage + '\' from: \'' + targetSentencePieceModelNameOnly + '\'.').encode(consoleEncoding) )
#if checkIfThisFileExists(targetSentencePieceModel) != True
else:
tempFileName=defaultSentencePieceModelPrefix+targetLanguage+defaultSentencePieceModelPostfix
#tempPath2=inputModelPathOnly + '/' + tempFileName
if checkIfThisFileExists(inputModelPathOnly + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/../' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/../' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/' + defaultSentencePieceModelFolder0 + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/' + defaultSentencePieceModelFolder0 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/' + defaultSentencePieceModelFolder1 + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/' + defaultSentencePieceModelFolder1 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/' + defaultSentencePieceModelFolder2 + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/' + defaultSentencePieceModelFolder2 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/../' + defaultSentencePieceModelFolder0 + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/../' + defaultSentencePieceModelFolder0 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/../' + defaultSentencePieceModelFolder1 + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/../' + defaultSentencePieceModelFolder1 + '/' + tempFileName
elif checkIfThisFileExists(inputModelPathOnly + '/../' + defaultSentencePieceModelFolder2 + '/' + tempFileName) == True:
targetSentencePieceModel=inputModelPathOnly + '/../' + defaultSentencePieceModelFolder2 + '/' + tempFileName
#The target is optional for fairseq, but required for ctranslate2.
if mode == 'ctranslate2':
verifyThisFileExists(targetSentencePieceModel,'targetSentencePieceModel')
if __name__ == '__main__':
print( ('Set targetSentencePieceModel to \'' + str(targetSentencePieceModel) + '\' from: \'' + targetLanguage + '\'.').encode(consoleEncoding) )
if uiPath != None:
if checkIfThisFileExists(uiPath) == True:
uiPath=str( pathlib.Path(uiPath).absolute() )
else:
print( 'Warning: Streamlit UI was specified but could not be found:\n')
print( uiPath.encode(consoleEncoding) )
print('')
uiPath=None
#Update some internal variables from default values.
bpe=default_bpe
beam_size=default_beam_size
num_hypotheses=default_num_hypotheses
no_repeat_ngram_size=default_no_repeat_ngram_size
#use_vmap=default_use_vmap #Update: Added this to CLI.
inter_threads=default_inter_threads
# For best processing time with CTranslate2, CPU threads should be the same as the number of physical cores for CPU loads (not logical cores). Unclear what it should be for GPU loads but the same number as with CPU loads is a good default based upon initial testing. Update: CPU theads does not matter much when using GPU. Use default setting.
#If the user specified a number of intra_threads, as --cpuThreads, then just use that instead.
if intra_threads != None:
pass
elif (mode=='ctranslate2') and (device=='cpu'):
if psutilAvailable == True:
#Always gives logical cores. Incorrect.
#intra_threads=os.cpu_count()
#Gives physical cores. Correct.
intra_threads=psutil.cpu_count(logical=False)
# Setting intra_threads=psutil.cpu_count(logical=False) always gives the wrong value for Bulldozer family FX series processors (2 Module - 4 thread ; 3 Module - 6 thread; 4 Module - 8 thread). Bulldozer FX series should use logical cores, not module count, because every logical core has some dedicated hardware to process the thread, unlike SMT.
# https://en.wikipedia.org/wiki/List_of_AMD_FX_processors
# Bandaid for Bulldozer FX systems on Windows.
# This will likely hurt performance for users that have non-Bulldozer AMD FX systems. No modern AMD FX processors currently exist, so this is more of a concern for the future.
# A proper fix might be to create alwaysUseLogicalCores.csv and look up the full processor name there, but it would be a challenge getting it fully correct due to needing the exact processor name which might require real hardware to test with which is unrealistic.
# Alternatively, this could be exposed to the user and they could deal with it at runtime.
# Maybe just always override this setting to whatever the user inputs? Update: Implemented this with the --cpuThreads option to allow for manual overrides.
# This band-aid fix is currently only available on Windows.
if sys.platform == 'win32':
try:
import win32com.client
if ( str(win32com.client.GetObject('winmgmts:root\cimv2').ExecQuery('Select * from Win32_Processor')[0].Name).strip()[:6] == 'AMD FX' ):
intra_threads=os.cpu_count()
except:
pass
# Fix for BSD systems. See:
# https://psutil.readthedocs.io/en/latest/#psutil.cpu_count
if intra_threads == None:
intra_threads=default_intra_threads
elif psutilAvailable == False:
intra_threads=default_intra_threads
else:
intra_threads=default_intra_threads
# Probably pointless, but just in case.
try:
assert(isinstance(intra_threads,int))
except:
print( 'Warning: Could not set CPU threads for CTranslate2 correctly.' )
intra_threads=0
if ( __name__ == '__main__' ) and (verbose == True) and (mode == 'ctranslate2'):
print ( 'CTranslate2 CPU threads=' + str(intra_threads) )
# Debug code.
#psutilAvailable=False
#Workaround to fairseq + CPU bug.
# Update: fairseq seems to hang on any sort of multiprocessing, multithreading, and even simple async + await calls.
if (mode == 'fairseq') and (device=='cpu') and (preloadModel==False) and (psutilAvailable != True):
# Then change to preloading the model because there is no way to end the child process reliably otherwise. It hangs after it finishes processing long batches.
preloadModel = True
if __name__ == '__main__':
print( '\n Warning: fairseq + CPU + multiprocessing requires psutil. Install with: \n\n pip install psutil \n\n Since psutil is not available, preloadModel=True. \n If this behavior is not desired, install psutil.\n')
#if (mode == 'fairseq') and (device=='cpu'):
# import signal #Sometimes required library. This is needed to send signal.SIGTERM to terminate processes when fairseq hangs. import conditionally.
if __name__ == '__main__':
# Print information to inform the user and help with debugging. Print it only in main since otherwise it gets printed out a lot.
# Always print out mode (fairseq/ctranslate 2)
print( 'mode=\''+mode + '\'' )
# Always print out device (cpu, cuda, directml)
print( 'device=\'' + device + '\'' )
# Always print out source language and target language
print( ('Source Language=\'' + sourceLanguage + '\'' ).encode(consoleEncoding) )
print( ('Target Language= \''+ targetLanguage + '\'' ).encode(consoleEncoding) )
if (verbose == True) or (debug == True):
# print out model name and path
print( ('inputModelFileNameAndPath=' + str(inputModelFileNameAndPath)).encode(consoleEncoding) )
# print out checkpoint file name (if present, only guranteed to be valid for fairseq)
print( ('inputModelNameWithoutPath=' + str(inputModelNameWithoutPath) ).encode(consoleEncoding) )
# print out model path
print( ('inputModelPathOnly=' + str(inputModelPathOnly) ).encode(consoleEncoding) )
# print source sentencepiece_model
print( ('sourceSentencePieceModel=' + str(sourceSentencePieceModel) ).encode(consoleEncoding) )
# print target sentencepiece_model (only for ctranslate 2)
print( ('targetSentencePieceModel=' + str(targetSentencePieceModel) ).encode(consoleEncoding) )
if debug == True:
# print out rest of variables
print( ('preloadModel=' + str(preloadModel) ).encode(consoleEncoding) )
print( ('perfMetrics=' + str(perfMetrics) ).encode(consoleEncoding) )
print( ('address=' + str(address) ).encode(consoleEncoding) )
print( ('port=' + str(port) ).encode(consoleEncoding) )
print( ('version=' + str(version) ).encode(consoleEncoding) )
print( ('cacheEnabled=' + str(cacheEnabled) ).encode(consoleEncoding) )
print( ('verbose=' + str(verbose) ).encode(consoleEncoding) )
print( ('debug=' + str(debug) ).encode(consoleEncoding) )
print( ('tornado version=' + str(tornado.version) ).encode(consoleEncoding) )
if mode == 'fairseq':
print( ('fairseq version=' + str(fairseq.__version__) ).encode(consoleEncoding) )
if mode == 'ctranslate2':
print( ('ctranslate2 version=' + str(ctranslate2.__version__) ).encode(consoleEncoding) )
#if device == 'directml':
#print out directML version and torch version. Maybe OS ver as well? Since it has arbitrary requirements.
# Start app based upon input.
# fairseq will use sourceSentencePieceModel but internally.
if mode == 'fairseq':
pass
elif mode == 'ctranslate2':
sourceLanguageProcessor = sentencepiece.SentencePieceProcessor(sourceSentencePieceModel)
targetLanguageProcessor = sentencepiece.SentencePieceProcessor(targetSentencePieceModel)
if preloadModel == True:
#Then preload model.
if mode == 'fairseq':
# Should probably have a conditional here that says: if bpe mode == 'sentencepiece' add sentencepiece_model, else if bpe mode == pie then add ...etc # And build the model differently based upon only the tokenizer/pbe changes since that appears to be the only condition that changes dramatically.
# For now, add sentencepiece_model unconditionally as needed by bpe=sentencepiece, but this will need to be updated later to support additional model types.
translator = fairseq.models.transformer.TransformerModel.from_pretrained(inputModelPathOnly,checkpoint_file=inputModelNameWithoutPath,source_lang=sourceLanguage,target_lang=targetLanguage,bpe=bpe, sentencepiece_model=sourceSentencePieceModel, no_repeat_ngram_size=no_repeat_ngram_size)
if device == 'cuda':
translator.cuda()
if device == 'directml':
# https://learn.microsoft.com/en-us/windows/ai/directml/gpu-pytorch-windows
# dml was defined earlier as: dml = torch_directml.device()
translator.to(dml)
elif mode == 'ctranslate2':
translator = ctranslate2.Translator(inputModelPathOnly, device=device, inter_threads=inter_threads, intra_threads=intra_threads)
else:
sys.exit( 'Unspecified error.' )
# This still blocks because a lot of time is spent here without any pause. Maybe this should go in its own thread?
def preloadModelTranslate( rawText ):
if mode == 'fairseq':
return translator.translate( rawText )
elif mode == 'ctranslate2':
return translator.translate_batch( source=rawText , beam_size=beam_size , num_hypotheses=num_hypotheses, no_repeat_ngram_size = no_repeat_ngram_size, use_vmap=use_vmap)
async def preloadModelTranslateProxy(executor, rawText):
return await asyncio.get_running_loop().run_in_executor(executor, preloadModelTranslate, rawText)
#def translateNMT(rawText,myQueue):
def translateNMT( rawText ):
if debug == True:
print( 'Processing item count: ' + str(len(rawText)) )
if mode == 'fairseq':
print( 'Loading fairseq in \'' + device + '\' mode for ' + str(len(rawText)) + ' entries.' )
translator = fairseq.models.transformer.TransformerModel.from_pretrained(inputModelPathOnly,checkpoint_file=inputModelNameWithoutPath,source_lang=sourceLanguage,target_lang=targetLanguage,bpe=bpe, sentencepiece_model=sourceSentencePieceModel, no_repeat_ngram_size=no_repeat_ngram_size)
if device == 'cuda':
translator.cuda()
elif device == 'directml':
# https://learn.microsoft.com/en-us/windows/ai/directml/gpu-pytorch-windows
# dml was defined earlier as: dml = torch_directml.device()
translator.to(dml)
if (verbose == True) and (perfMetrics==True):
startProcessingTime=time.perf_counter()
#if device == 'cpu':
# Process each entry individually. Does not fix bug.
# for textEntry in rawText:
# myQueue.put( translator.translate(textEntry) )
#Batch mode. Works well.
outputText = translator.translate(rawText)
if (verbose == True) and (perfMetrics==True):
processingTime=round(time.perf_counter() - startProcessingTime, 2)
print( 'Processing time: ' + str( processingTime ) + ' seconds' )
if debug == True:
print(str(outputText))
# multiprocessing.Queue logic.
#for textEntry in outputText:
# myQueue.put(textEntry)
# multiprocessing.Pipe logic.
#myQueue.send(outputText)
#myQueue.close()
# concurrent.futures.ProcessPoolExecutor logic.
# ProcessPoolExecutor is a wrapper for the multiprocessing module.
return outputText
elif mode == 'ctranslate2':
print( 'Loading CTranslate2 in \'' + device + '\' mode for ' + str(len(rawText)) + ' entries.' )
translator = ctranslate2.Translator(inputModelPathOnly, device=device, inter_threads=inter_threads, intra_threads=intra_threads)
textAfterPreProcessing = sourceLanguageProcessor.encode(rawText, out_type=str);
if (verbose == True) and (perfMetrics==True):
startProcessingTime=time.perf_counter()
outputText = translator.translate_batch( source=textAfterPreProcessing , beam_size=beam_size , num_hypotheses=num_hypotheses, no_repeat_ngram_size=no_repeat_ngram_size, use_vmap=use_vmap)
if (verbose == True) and (perfMetrics==True):
processingTime=round(time.perf_counter() - startProcessingTime, 2)
print( 'Processing time: ' + str( processingTime ) + ' seconds' )
# multiprocessing.Queue logic.
#for i in range(len(outputText)):
# myQueue.put(targetLanguageProcessor.decode(outputText[i].hypotheses[0]))
# concurrent.futures.ProcessPoolExecutor logic.
newList=[]
for i in range( len(outputText) ):
newList.append( targetLanguageProcessor.decode( outputText[i].hypotheses[0] ) )
return newList
else:
sys.exit( 'Unspecified error.' )
# This function allows run_in_executor() to be added to a taskList, which is a Python list, and then awaiting the taskList.
# That will process all of the entries at once with an instance of concurrent.futures.ProcessPoolExecutor .
# Otherwise, each instance of each task will block the next and also maybe the ioloop depending upon implementation details.
async def proxyTranslateNMT(executor, translateMe):
#print( 'pie' * 200 )
return await asyncio.get_running_loop().run_in_executor(executor, translateNMT, translateMe)
class MainHandler(tornado.web.RequestHandler):
async def get(self):
print('self.request=' + str(self.request) )
if debug == True:
print( 'Executing: ' + type(self).__name__ + '.' + inspect.currentframe().f_code.co_name ) #Print out className.currentFunctionName.
self.set_header('Content-Type', 'text/plain')
self.set_status(200)
self.write( 'Hello. Please use HTTP POST to communicate with ' + currentScriptNameWithoutPath)
async def post(self):
if perfMetrics == True:
requestStartTime = time.perf_counter()
self.set_header("Content-Type", 'application/json') #Set automatically by Tornado, so redundant.
self.set_status(200)
if debug == True:
print('self.request=' + str(self.request) )
client_uri = self.request.uri
client_path = self.request.path
client_query = self.request.query
client_remote_ip = self.request.remote_ip
client_url = self.request.full_url()
print('client_uri=' + client_uri)
print('client_path=' + client_path)
print('client_query=' + client_query)
print('client_remote_ip=' + client_remote_ip)
print('client_url=' + client_url)
print('self.get_arguments=' + str(self.get_arguments(self)))
print('self.get_body_arguments=' + str(self.get_body_arguments(self)))
# Basically, self.args is a dictionary made from self.request.body.
# self.args['content'] returns all content specified in the 'content' entry.
# if that returned item is a list, then self.args['content'][0] returns the first item in that list.
if debug == True:
print('self.request.body=' + str(self.request.body) )
# Assume input is json and just blindly decode.
#self.args = tornado.escape.json_decode(self.request.body)
# Check if input is json, and then code. if content is not application/json, then error out.
if self.request.headers.get('Content-Type') == 'application/json':
self.args = tornado.escape.json_decode(self.request.body)
else:
print( 'Error: Only json is supported as input currently. Returning.')
return
if (self.args == None) or (self.args == ''):
print( 'Error: No json contents found in request.body. Returning.')
return
if not isinstance(self.args,dict):
print( 'Error: request.body did not return a Python dictionary. Returning.')
return
#This should print something like...
#self.args={'content': '\xe4\xbb\x8a\xe6\x97\xa5\xe3\x82\x82', 'message': 'translate sentences'}
#print( ('self.args=' + str(self.args)).encode(consoleEncoding) ) # Safer.
print( 'self.args=' + str(self.args) ) # More user friendly.
if debug == True:
# In the json submitted via post, the 'content' entry in the dictionary should contain a single string or a python list of strings.
if 'content' in self.args:
print( 'content=' + str(self.args['content']) )
if 'message' in self.args:
if ( str(self.args['message']).lower() == 'close server' ):
if (cacheEnabled == True) and (len(translationCacheDictionary) != 0):
writeOutCache()
print('Info: Recieved \'close server\' message. Exiting.')
#asyncio.get_running_loop().stop()
#asyncio.get_running_loop().stop()
#tornado.ioloop.IOLoop.instance().stop()
#tornado.ioloop.IOLoop.instance().stop()
#tornado.ioloop.IOLoop.stop(self)
#asyncio.get_running_loop().stop()
#tornado.ioloop.IOLoop.current().add_timeout(time.time()+1, tornado.ioloop.IOLoop.current().stop())
raise KeyboardInterrupt # Just let main() deal with this. Sloppy, but whatever.
return
print('This should not be printed.')
rawInput=None
if 'content' in self.args:
#self.args['content'] can be a string, which is a single sentence to translate, or it can be a Python list of many strings.
rawInput=self.args['content']
else:
#The data processing assumes the data is in self.args['content']. If there is another place to look, then it has to be added manually, so for now, just return if there was no 'content' entry in the submitted json.
print( 'Error: No \'content\' entry was found in the json request.body. Returning.')
return
if (debug == True):
print( ('rawInput before string conversion=' + str(rawInput)).encode(consoleEncoding) )
convertedToList=False