nod-ai · abdulraheembeigh · Aug 3, 2023
diff --git a/apps/language_models/scripts/stablelm.py b/apps/language_models/scripts/stablelm.py
@@ -49,7 +49,6 @@ def compile_stableLM(
 ):
     from shark.shark_inference import SharkInference
 
-    # device = "cuda"  # "cpu"
     # TODO: vmfb and mlir name should include precision and device
     vmfb_path = (
         Path(model_name + f"_{device}.vmfb")
@@ -130,13 +129,6 @@ def get_tokenizer():
     return tok
 
 
-# sharkStableLM = compile_stableLM
-# (
-#   None,
-#   tuple([input_ids, attention_mask]),
-#   "stableLM_linalg_f32_seqLen256",
-#   "/home/shark/vivek/stableLM_shark_f32_seqLen256"
-# )
 def generate(
     new_text,
     max_new_tokens,
@@ -148,18 +140,8 @@ def generate(
     # Construct the input message string for the model by
     # concatenating the current system message and conversation history
     # Tokenize the messages string
-    # sharkStableLM = compile_stableLM
-    # (
-    #   None,
-    #   tuple([input_ids, attention_mask]),
-    #   "stableLM_linalg_f32_seqLen256",
-    #   "/home/shark/vivek/stableLM_shark_f32_seqLen256"
-    # )
     words_list = []
     for i in range(max_new_tokens):
-        # numWords = len(new_text.split())
-        # if(numWords>220):
-        #  break
         params = {
             "new_text": new_text,
         }
@@ -188,7 +170,6 @@ def generate_new_token(shark_model, tokenizer, params):
         return_tensors="pt",
     )
     sum_attentionmask = torch.sum(model_inputs.attention_mask)
-    # sharkStableLM = compile_stableLM(None, tuple([input_ids, attention_mask]), "stableLM_linalg_f32_seqLen256", "/home/shark/vivek/stableLM_shark_f32_seqLen256")
     output = shark_model(
         "forward", [model_inputs.input_ids, model_inputs.attention_mask]
     )

diff --git a/apps/language_models/src/pipelines/stablelm_pipeline.py b/apps/language_models/src/pipelines/stablelm_pipeline.py
@@ -63,7 +63,6 @@ def compile(self):
             f"stableLM_linalg_{self.precision}_seqLen{self.max_sequence_len}"
         )
 
-        # device = "cuda"  # "cpu"
         # TODO: vmfb and mlir name should include precision and device
         model_vmfb_name = None
         vmfb_path = (
@@ -120,7 +119,6 @@ def compile(self):
     def get_tokenizer(self):
         tok = AutoTokenizer.from_pretrained(self.hf_model_path)
         tok.add_special_tokens({"pad_token": "<PAD>"})
-        # print("[DEBUG] Sucessfully loaded the tokenizer to the memory")
         return tok
 
     def generate(self, prompt):