From 8be67d8e00557937cf753769f1a34916b54ed5e4 Mon Sep 17 00:00:00 2001
From: dan <danimal197@gmail.com>
Date: Tue, 20 Jun 2023 23:19:45 -0500
Subject: [PATCH] fp16 fixes for webui

---
 .gitignore                                    |  2 ++
 .../src/pipelines/vicuna_pipeline.py          | 28 +++++++++++++++----
 apps/stable_diffusion/src/utils/utils.py      |  5 +++-
 apps/stable_diffusion/web/ui/stablelm_ui.py   | 16 ++++++-----
 4 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index efc8970565..395a677ba6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 __pycache__/
 *.py[cod]
 *$py.class
+*.mlir
+*.vmfb
 
 # C extensions
 *.so
diff --git a/apps/language_models/src/pipelines/vicuna_pipeline.py b/apps/language_models/src/pipelines/vicuna_pipeline.py
index e566d44a39..4745504ae5 100644
--- a/apps/language_models/src/pipelines/vicuna_pipeline.py
+++ b/apps/language_models/src/pipelines/vicuna_pipeline.py
@@ -28,10 +28,10 @@ def __init__(
         max_num_tokens=512,
         device="cuda",
         precision="fp32",
-        first_vicuna_mlir_path=Path("first_vicuna.mlir"),
-        second_vicuna_mlir_path=Path("second_vicuna.mlir"),
-        first_vicuna_vmfb_path=Path("first_vicuna.vmfb"),
-        second_vicuna_vmfb_path=Path("second_vicuna.vmfb"),
+        first_vicuna_mlir_path=None,
+        second_vicuna_mlir_path=None,
+        first_vicuna_vmfb_path=None,
+        second_vicuna_vmfb_path=None,
         load_mlir_from_shark_tank=True,
     ) -> None:
         super().__init__(model_name, hf_model_path, max_num_tokens)
@@ -42,9 +42,27 @@ def __init__(
         self.second_vicuna_vmfb_path = second_vicuna_vmfb_path
         self.first_vicuna_mlir_path = first_vicuna_mlir_path
         self.second_vicuna_mlir_path = second_vicuna_mlir_path
+        self.load_mlir_from_shark_tank = load_mlir_from_shark_tank
+        if self.first_vicuna_mlir_path == None:
+            self.first_vicuna_mlir_path = self.get_model_path()
+        if self.second_vicuna_mlir_path == None:
+            self.second_vicuna_mlir_path = self.get_model_path("second")
+        if self.first_vicuna_vmfb_path == None:
+            self.first_vicuna_vmfb_path = self.get_model_path(suffix="vmfb")
+        if self.second_vicuna_vmfb_path == None:
+            self.second_vicuna_vmfb_path = self.get_model_path(
+                "second", "vmfb"
+            )
         self.tokenizer = self.get_tokenizer()
         self.shark_model = self.compile()
-        self.load_mlir_from_shark_tank = load_mlir_from_shark_tank
+
+    def get_model_path(self, model_number="first", suffix="mlir"):
+        safe_device = "_".join(self.device.split("-"))
+        if suffix == "mlir":
+            return Path(f"{model_number}_vicuna_{self.precision}.{suffix}")
+        return Path(
+            f"{model_number}_vicuna_{safe_device}_{self.precision}.{suffix}"
+        )
 
     def get_tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(
diff --git a/apps/stable_diffusion/src/utils/utils.py b/apps/stable_diffusion/src/utils/utils.py
index 85d943d93c..2ad5f53aaa 100644
--- a/apps/stable_diffusion/src/utils/utils.py
+++ b/apps/stable_diffusion/src/utils/utils.py
@@ -428,7 +428,10 @@ def get_devices_by_name(driver_name):
     available_devices.extend(vulkan_devices)
     cuda_devices = get_devices_by_name("cuda")
     available_devices.extend(cuda_devices)
-    available_devices.append("device => cpu")
+    cpu_device = get_devices_by_name("cpu-sync")
+    available_devices.extend(cpu_device)
+    cpu_device = get_devices_by_name("cpu-task")
+    available_devices.extend(cpu_device)
     return available_devices
 
 
diff --git a/apps/stable_diffusion/web/ui/stablelm_ui.py b/apps/stable_diffusion/web/ui/stablelm_ui.py
index 6735d5b4ff..0e5cf4092d 100644
--- a/apps/stable_diffusion/web/ui/stablelm_ui.py
+++ b/apps/stable_diffusion/web/ui/stablelm_ui.py
@@ -41,17 +41,21 @@ def chat(curr_system_message, history, model, device, precision):
 
         curr_system_message = start_message_vicuna
         if vicuna_model == 0:
-            first_vic_vmfb_path = Path("first_vicuna.vmfb")
-            second_vic_vmfb_path = Path("second_vicuna.vmfb")
             if "cuda" in device:
                 device = "cuda"
+            elif "sync" in device:
+                device = "cpu-sync"
+            elif "task" in device:
+                device = "cpu-task"
+            elif "vulkan" in device:
+                device = "vulkan"
+            else:
+                print("unrecognized device")
             vicuna_model = Vicuna(
                 "vicuna",
                 hf_model_path=model,
                 device=device,
                 precision=precision,
-                first_vicuna_vmfb_path=first_vic_vmfb_path,
-                second_vicuna_vmfb_path=second_vic_vmfb_path,
             )
         messages = curr_system_message + "".join(
             [
@@ -120,9 +124,7 @@ def chat(curr_system_message, history, model, device, precision):
                 "TheBloke/vicuna-7B-1.1-HF",
             ],
         )
-        supported_devices = [
-            device for device in available_devices if "cuda" in device
-        ]
+        supported_devices = available_devices
         enabled = len(supported_devices) > 0
         device = gr.Dropdown(
             label="Device",