Merge branch 'main' into remove_sentence_transformers

nod-ai · Nov 9, 2023 · 323e4d7 · 323e4d7
2 parents 393d470 + f41ad87
commit 323e4d7
Show file tree

Hide file tree

Showing 125 changed files with 8,207 additions and 4,039 deletions.
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -51,11 +51,11 @@ jobs:
       run: |
         ./setup_venv.ps1
         $env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
-        pip wheel -v -w dist . --pre -f https://download.pytorch.org/whl/nightly/cpu -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
+        pip wheel -v -w dist . --pre -f https://download.pytorch.org/whl/nightly/cpu -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html
         python process_skipfiles.py
         pyinstaller .\apps\stable_diffusion\shark_sd.spec
         mv ./dist/nodai_shark_studio.exe ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
-        signtool sign /f c:\g\shark_02152023.cer /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
+        signtool sign /f c:\g\shark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
   
     - name: Upload Release Assets
       id: upload-release-assets
@@ -104,7 +104,7 @@ jobs:
         echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest toml
-        if [ -f requirements.txt ]; then pip install -r requirements.txt -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html; fi
+        if [ -f requirements.txt ]; then pip install -r requirements.txt -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html; fi
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -144,7 +144,7 @@ jobs:
         source shark.venv/bin/activate
         package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
         SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html
         # Install the built wheel
         pip install ./wheelhouse/nodai*
         # Validate the Models

diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml
@@ -137,7 +137,8 @@ jobs:
         source shark.venv/bin/activate
         echo $PATH
         pip list | grep -E "torch|iree"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" --tank_url="gs://shark_tank/nightly/" -k metal
+      # disabled due to a low-visibility memory issue with pytest on macos.
+      # pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" --tank_url="gs://shark_tank/nightly/" -k metal
 
     - name: Validate Vulkan Models (a100)
       if: matrix.suite == 'vulkan' && matrix.os == 'a100'

diff --git a/.gitignore b/.gitignore
@@ -182,7 +182,7 @@ generated_imgs/
 
 # Custom model related artefacts
 variants.json
-models/
+/models/
 
 # models folder
 apps/stable_diffusion/web/models/
@@ -193,3 +193,12 @@ stencil_annotator/
 # For DocuChat
 apps/language_models/langchain/user_path/
 db_dir_UserData
+
+# Embeded browser cache and other
+apps/stable_diffusion/web/EBWebView/
+
+# Llama2 tokenizer configs
+llama2_tokenizer_configs/
+
+# Webview2 runtime artefacts
+EBWebView/
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "inference/thirdparty/shark-runtime"]
 	path = inference/thirdparty/shark-runtime
-	url =https://github.com/nod-ai/SHARK-Runtime.git
+	url =https://github.com/nod-ai/SRT.git
 	branch = shark-06032022
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ High Performance Machine Learning Distribution
   <summary>Prerequisites - Drivers </summary>
 
 #### Install your Windows hardware drivers
-* [AMD RDNA Users] Download the latest driver [here](https://www.amd.com/en/support/kb/release-notes/rn-rad-win-23-2-1).
+* [AMD RDNA Users] Download the latest driver (23.2.1 is the oldest supported) [here](https://www.amd.com/en/support).
 * [macOS Users] Download and install the 1.3.216 Vulkan SDK from [here](https://sdk.lunarg.com/sdk/download/1.3.216.0/mac/vulkansdk-macos-1.3.216.0.dmg). Newer versions of the SDK will not work. 
 * [Nvidia Users] Download and install the latest CUDA / Vulkan drivers from [here](https://developer.nvidia.com/cuda-downloads)
 
@@ -170,7 +170,7 @@ python -m pip install --upgrade pip
 This step pip installs SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
 
 ```shell
-pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f  https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f  https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
 ```
 
 ### Run shark tank model tests.
@@ -254,7 +254,6 @@ if you want to instead incorporate this into a python script, you can pass the `
 ```
 shark_module = SharkInference(
         mlir_model,
-        func_name,
         device=args.device,
         mlir_dialect="tm_tensor",
         dispatch_benchmarks="all",
@@ -297,7 +296,7 @@ torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
 # SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
 
 from shark.shark_inference import SharkInference
-shark_module = SharkInference(torch_mlir, func_name, device="cpu", mlir_dialect="linalg")
+shark_module = SharkInference(torch_mlir, device="cpu", mlir_dialect="linalg")
 shark_module.compile()
 result = shark_module.forward((input))
 
@@ -320,12 +319,17 @@ mhlo_ir = r"""builtin.module  {
 
 arg0 = np.ones((1, 4)).astype(np.float32)
 arg1 = np.ones((4, 1)).astype(np.float32)
-shark_module = SharkInference(mhlo_ir, func_name="forward", device="cpu", mlir_dialect="mhlo")
+shark_module = SharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
 shark_module.compile()
 result = shark_module.forward((arg0, arg1))
 ```
 </details>
 
+## Examples Using the REST API
+
+* [Setting up SHARK for use with Blender](./docs/shark_sd_blender.md)
+* [Setting up SHARK for use with Koboldcpp](./docs/shark_sd_koboldcpp.md)
+
 ## Supported and Validated Models
 
 SHARK is maintained to support the latest innovations in ML Models: 

diff --git a/apps/language_models/langchain/h2oai_pipeline.py b/apps/language_models/langchain/h2oai_pipeline.py
@@ -20,7 +20,7 @@
 from pathlib import Path
 from shark.shark_inference import SharkInference
 from shark.shark_downloader import download_public_file
-from shark.shark_importer import import_with_fx
+from shark.shark_importer import import_with_fx, save_mlir
 from apps.stable_diffusion.src import args
 
 # Brevitas
@@ -29,14 +29,8 @@
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl
 
 
-def brevitas〇matmul_rhs_group_quant〡shape(
-    lhs: List[int],
-    rhs: List[int],
-    rhs_scale: List[int],
-    rhs_zero_point: List[int],
-    rhs_bit_width: int,
-    rhs_group_size: int,
-) -> List[int]:
+# fmt: off
+def quant〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
     if len(lhs) == 3 and len(rhs) == 2:
         return [lhs[0], lhs[1], rhs[0]]
     elif len(lhs) == 2 and len(rhs) == 2:
@@ -45,30 +39,21 @@ def brevitas〇matmul_rhs_group_quant〡shape(
         raise ValueError("Input shapes not supported.")
 
 
-def brevitas〇matmul_rhs_group_quant〡dtype(
-    lhs_rank_dtype: Tuple[int, int],
-    rhs_rank_dtype: Tuple[int, int],
-    rhs_scale_rank_dtype: Tuple[int, int],
-    rhs_zero_point_rank_dtype: Tuple[int, int],
-    rhs_bit_width: int,
-    rhs_group_size: int,
-) -> int:
+def quant〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
     # output dtype is the dtype of the lhs float input
     lhs_rank, lhs_dtype = lhs_rank_dtype
     return lhs_dtype
 
 
-def brevitas〇matmul_rhs_group_quant〡has_value_semantics(
-    lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size
-) -> None:
+def quant〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
     return
 
 
 brevitas_matmul_rhs_group_quant_library = [
-    brevitas〇matmul_rhs_group_quant〡shape,
-    brevitas〇matmul_rhs_group_quant〡dtype,
-    brevitas〇matmul_rhs_group_quant〡has_value_semantics,
-]
+    quant〇matmul_rhs_group_quant〡shape,
+    quant〇matmul_rhs_group_quant〡dtype,
+    quant〇matmul_rhs_group_quant〡has_value_semantics]
+# fmt: on
 
 global_device = "cuda"
 global_precision = "fp16"
@@ -244,15 +229,15 @@ def get_bytecode(self, device, precision):
                 ts_graph,
                 [*h2ogptCompileInput],
                 output_type=torch_mlir.OutputType.TORCH,
-                backend_legal_ops=["brevitas.matmul_rhs_group_quant"],
+                backend_legal_ops=["quant.matmul_rhs_group_quant"],
                 extra_library=brevitas_matmul_rhs_group_quant_library,
                 use_tracing=False,
                 verbose=False,
             )
             print(f"[DEBUG] converting torch to linalg")
             run_pipeline_with_repro_report(
                 module,
-                "builtin.module(func.func(torch-unpack-torch-tensor),torch-backend-to-linalg-on-tensors-backend-pipeline)",
+                "builtin.module(func.func(torch-unpack-quant-tensor),func.func(torch-convert-custom-quant-op),torch-backend-to-linalg-on-tensors-backend-pipeline)",
                 description="Lowering Torch Backend IR -> Linalg-on-Tensors Backend IR",
             )
         else:
@@ -271,6 +256,11 @@ def get_bytecode(self, device, precision):
         bytecode = bytecode_stream.getvalue()
         del module
 
+        bytecode = save_mlir(
+            bytecode,
+            model_name=f"h2ogpt_{precision}",
+            frontend="torch",
+        )
         return bytecode
 
     def forward(self, input_ids, attention_mask):

diff --git a/apps/language_models/langchain/langchain_requirements.txt b/apps/language_models/langchain/langchain_requirements.txt
@@ -68,8 +68,8 @@ tiktoken==0.4.0
 openai==0.27.8
 
 # optional for chat with PDF
-langchain==0.0.202
-pypdf==3.12.2
+langchain==0.0.325
+pypdf==3.17.0
 # avoid textract, requires old six
 #textract==1.6.5