diff --git a/setup.py b/setup.py index 35594bdb4c..8be3f3e0d7 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ install_requires=[ "numpy", "PyYAML", - "torch-mlir>=20221021.633", + "torch-mlir==20230620.875", ] + backend_deps, ) diff --git a/setup_venv.sh b/setup_venv.sh index 6dbf25a4e0..e80560607b 100755 --- a/setup_venv.sh +++ b/setup_venv.sh @@ -88,7 +88,7 @@ if [ "$torch_mlir_bin" = true ]; then echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch." $PYTHON -m pip install --pre --no-cache-dir torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/ else - $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ + $PYTHON -m pip install --pre torch-mlir==20230620.875 -f https://llvm.github.io/torch-mlir/package-index/ if [ $? -eq 0 ];then echo "Successfully Installed torch-mlir" else diff --git a/tank/examples/opt/opt_causallm.py b/tank/examples/opt/opt_causallm.py index c9baea554d..449509c3cd 100644 --- a/tank/examples/opt/opt_causallm.py +++ b/tank/examples/opt/opt_causallm.py @@ -12,8 +12,8 @@ OPT_MODEL = "opt-1.3b" OPT_FS_NAME = "opt-1_3b" -MAX_SEQUENCE_LENGTH = 30 -MAX_NEW_TOKENS = 20 +MAX_SEQUENCE_LENGTH = 128 +MAX_NEW_TOKENS = 60 def create_module(model_name, tokenizer, device): @@ -110,13 +110,13 @@ def generate_new_token(shark_model, tokenizer, new_text): "facebook/" + OPT_MODEL, use_fast=False ) vmfb_path = ( - f"./{OPT_FS_NAME}_causallm_{MAX_SEQUENCE_LENGTH}_torch_cpu-sync.vmfb" + f"./{OPT_FS_NAME}_causallm_{MAX_SEQUENCE_LENGTH}_torch_cpu-task.vmfb" ) - opt_shark_module = SharkInference(mlir_module=None, device="cpu-sync") + opt_shark_module = SharkInference(mlir_module=None, device="cpu-task") if os.path.isfile(vmfb_path): opt_shark_module.load_module(vmfb_path) else: - vmfb_path = create_module(OPT_MODEL, tokenizer, "cpu-sync") + vmfb_path = create_module(OPT_MODEL, tokenizer, "cpu-task") opt_shark_module.load_module(vmfb_path) while True: try: