From c4319eddfb369f159d90f8483de11372a2f49133 Mon Sep 17 00:00:00 2001
From: Abdul Raheem Beigh <abdulraheembeigh@gmail.com>
Date: Thu, 3 Aug 2023 19:51:44 +0100
Subject: [PATCH] [NFC] Minor NFC changes Helping make code clean

---
 apps/language_models/scripts/stablelm.py      | 20 -------------------
 .../src/pipelines/stablelm_pipeline.py        |  2 --
 2 files changed, 22 deletions(-)
diff --git a/apps/language_models/scripts/stablelm.py b/apps/language_models/scripts/stablelm.py
index 223760374a..bb637ede64 100644
--- a/apps/language_models/scripts/stablelm.py
+++ b/apps/language_models/scripts/stablelm.py
@@ -49,7 +49,6 @@ def compile_stableLM(
 ):
     from shark.shark_inference import SharkInference
 
-    # device = "cuda"  # "cpu"
     # TODO: vmfb and mlir name should include precision and device
     vmfb_path = (
         Path(model_name + f"_{device}.vmfb")
@@ -129,14 +128,6 @@ def get_tokenizer():
     print("Sucessfully loaded the tokenizer to the memory")
     return tok
 
-
-# sharkStableLM = compile_stableLM
-# (
-#   None,
-#   tuple([input_ids, attention_mask]),
-#   "stableLM_linalg_f32_seqLen256",
-#   "/home/shark/vivek/stableLM_shark_f32_seqLen256"
-# )
 def generate(
     new_text,
     max_new_tokens,
@@ -148,18 +139,8 @@ def generate(
     # Construct the input message string for the model by
     # concatenating the current system message and conversation history
     # Tokenize the messages string
-    # sharkStableLM = compile_stableLM
-    # (
-    #   None,
-    #   tuple([input_ids, attention_mask]),
-    #   "stableLM_linalg_f32_seqLen256",
-    #   "/home/shark/vivek/stableLM_shark_f32_seqLen256"
-    # )
     words_list = []
     for i in range(max_new_tokens):
-        # numWords = len(new_text.split())
-        # if(numWords>220):
-        #  break
         params = {
             "new_text": new_text,
         }
@@ -188,7 +169,6 @@ def generate_new_token(shark_model, tokenizer, params):
         return_tensors="pt",
     )
     sum_attentionmask = torch.sum(model_inputs.attention_mask)
-    # sharkStableLM = compile_stableLM(None, tuple([input_ids, attention_mask]), "stableLM_linalg_f32_seqLen256", "/home/shark/vivek/stableLM_shark_f32_seqLen256")
     output = shark_model(
         "forward", [model_inputs.input_ids, model_inputs.attention_mask]
     )
diff --git a/apps/language_models/src/pipelines/stablelm_pipeline.py b/apps/language_models/src/pipelines/stablelm_pipeline.py
index d765b72fce..b54a94b7c6 100644
--- a/apps/language_models/src/pipelines/stablelm_pipeline.py
+++ b/apps/language_models/src/pipelines/stablelm_pipeline.py
@@ -63,7 +63,6 @@ def compile(self):
             f"stableLM_linalg_{self.precision}_seqLen{self.max_sequence_len}"
         )
 
-        # device = "cuda"  # "cpu"
         # TODO: vmfb and mlir name should include precision and device
         model_vmfb_name = None
         vmfb_path = (
@@ -120,7 +119,6 @@ def compile(self):
     def get_tokenizer(self):
         tok = AutoTokenizer.from_pretrained(self.hf_model_path)
         tok.add_special_tokens({"pad_token": "<PAD>"})
-        # print("[DEBUG] Sucessfully loaded the tokenizer to the memory")
         return tok
 
     def generate(self, prompt):