Pin torch-mlir and use local-task in OPT.

nod-ai · Jun 24, 2023 · 64a0b35 · 64a0b35
1 parent cdd505e
commit 64a0b35
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 7 deletions.
diff --git a/setup.py b/setup.py
@@ -39,7 +39,7 @@
     install_requires=[
         "numpy",
         "PyYAML",
-        "torch-mlir>=20221021.633",
+        "torch-mlir==20230620.875",
     ]
     + backend_deps,
 )
diff --git a/setup_venv.sh b/setup_venv.sh
@@ -88,7 +88,7 @@ if [ "$torch_mlir_bin" = true ]; then
     echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
     $PYTHON -m pip install --pre --no-cache-dir torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
   else
-    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
+    $PYTHON -m pip install --pre torch-mlir==20230620.875 -f https://llvm.github.io/torch-mlir/package-index/
     if [ $? -eq 0 ];then
       echo "Successfully Installed torch-mlir"
     else

diff --git a/tank/examples/opt/opt_causallm.py b/tank/examples/opt/opt_causallm.py
@@ -12,8 +12,8 @@
 
 OPT_MODEL = "opt-1.3b"
 OPT_FS_NAME = "opt-1_3b"
-MAX_SEQUENCE_LENGTH = 30
-MAX_NEW_TOKENS = 20
+MAX_SEQUENCE_LENGTH = 128
+MAX_NEW_TOKENS = 60
 
 
 def create_module(model_name, tokenizer, device):
@@ -110,13 +110,13 @@ def generate_new_token(shark_model, tokenizer, new_text):
         "facebook/" + OPT_MODEL, use_fast=False
     )
     vmfb_path = (
-        f"./{OPT_FS_NAME}_causallm_{MAX_SEQUENCE_LENGTH}_torch_cpu-sync.vmfb"
+        f"./{OPT_FS_NAME}_causallm_{MAX_SEQUENCE_LENGTH}_torch_cpu-task.vmfb"
     )
-    opt_shark_module = SharkInference(mlir_module=None, device="cpu-sync")
+    opt_shark_module = SharkInference(mlir_module=None, device="cpu-task")
     if os.path.isfile(vmfb_path):
         opt_shark_module.load_module(vmfb_path)
     else:
-        vmfb_path = create_module(OPT_MODEL, tokenizer, "cpu-sync")
+        vmfb_path = create_module(OPT_MODEL, tokenizer, "cpu-task")
         opt_shark_module.load_module(vmfb_path)
     while True:
         try: