Relocate quantized matmul reassociation flag (#2047)

* Remove quantized matmul reassociation flag This flag should be a model/use-case specific addition, not a default CPU compile flag.
nod-ai · Dec 20, 2023 · fa95ed3 · fa95ed3
1 parent 788cc91
commit fa95ed3
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 1 deletion.
diff --git a/apps/shark_studio/api/llm.py b/apps/shark_studio/api/llm.py
@@ -106,6 +106,7 @@ def compile(self) -> None:
             frontend="torch",
             external_weight_file=self.external_weight_file,
             write_to=self.vmfb_name,
+            extra_args=["--iree-global-opt-enable-quantized-matmul-reassociation"],
         )
         # TODO: delete the temp file
 

diff --git a/shark/iree_utils/compile_utils.py b/shark/iree_utils/compile_utils.py
@@ -43,7 +43,6 @@ def get_iree_device_args(device, extra_args=[]):
             get_iree_cpu_args()
             + u_kernel_flag
             + stack_size_flag
-            + ["--iree-global-opt-enable-quantized-matmul-reassociation"]
         )
     if device == "cuda":
         from shark.iree_utils.gpu_utils import get_iree_gpu_args