We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
from datasets import load_dataset from random import randint # Load our test dataset eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train") rand_idx = randint(0, len(eval_dataset)) # Test on sample prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True) outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id) print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}") print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}") print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")
gave error:
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) Cell In[14], line 11 9 # Test on sample 10 prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True) ---> 11 outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id) 13 print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}") 14 print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}") File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:208, in TextGenerationPipeline.__call__(self, text_inputs, **kwargs) 167 def __call__(self, text_inputs, **kwargs): 168 """ 169 Complete the prompt(s) given as inputs. 170 (...) 206 ids of the generated text. 207 """ --> 208 return super().__call__(text_inputs, **kwargs) File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1140, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs) 1132 return next( 1133 iter( 1134 self.get_iterator( (...) 1137 ) 1138 ) 1139 else: -> 1140 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params) File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1147, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params) 1145 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params): 1146 model_inputs = self.preprocess(inputs, **preprocess_params) -> 1147 model_outputs = self.forward(model_inputs, **forward_params) 1148 outputs = self.postprocess(model_outputs, **postprocess_params) 1149 return outputs File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1046, in Pipeline.forward(self, model_inputs, **forward_params) 1044 with inference_context(): 1045 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device) -> 1046 model_outputs = self._forward(model_inputs, **forward_params) 1047 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu")) 1048 else: File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:271, in TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs) 268 generate_kwargs["min_length"] += prefix_length 270 # BS x SL --> 271 generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs) 272 out_b = generated_sequence.shape[0] 273 if self.framework == "pt": File /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:1140, in PeftModelForCausalLM.generate(self, **kwargs) 1138 self.base_model.generation_config = self.generation_config 1139 try: -> 1140 outputs = self.base_model.generate(**kwargs) 1141 except: 1142 self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation File /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs) 112 @functools.wraps(func) 113 def decorate_context(*args, **kwargs): 114 with ctx_factory(): --> 115 return func(*args, **kwargs) File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1718, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs) 1701 return self.assisted_decoding( 1702 input_ids, 1703 assistant_model=assistant_model, (...) 1714 **model_kwargs, 1715 ) 1716 if generation_mode == GenerationMode.GREEDY_SEARCH: 1717 # 11. run greedy search -> 1718 return self.greedy_search( 1719 input_ids, 1720 logits_processor=logits_processor, 1721 stopping_criteria=stopping_criteria, 1722 pad_token_id=generation_config.pad_token_id, 1723 eos_token_id=generation_config.eos_token_id, 1724 output_scores=generation_config.output_scores, 1725 return_dict_in_generate=generation_config.return_dict_in_generate, 1726 synced_gpus=synced_gpus, 1727 streamer=streamer, 1728 **model_kwargs, 1729 ) 1731 elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH: 1732 if not model_kwargs["use_cache"]: File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:2579, in GenerationMixin.greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs) 2576 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) 2578 # forward pass to get next token -> 2579 outputs = self( 2580 **model_inputs, 2581 return_dict=True, 2582 output_attentions=output_attentions, 2583 output_hidden_states=output_hidden_states, 2584 ) 2586 if synced_gpus and this_peer_finished: 2587 continue # don't waste resources running the code we don't need File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1529 try: 1530 result = None File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 163 output = module._old_forward(*args, **kwargs) 164 else: --> 165 output = module._old_forward(*args, **kwargs) 166 return module._hf_hook.post_forward(module, output) File /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:1199, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict) 1197 logits = torch.cat(logits, dim=-1) 1198 else: -> 1199 logits = self.lm_head(hidden_states) 1200 logits = logits.float() 1202 loss = None File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1529 try: 1530 result = None File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:160, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 159 def new_forward(module, *args, **kwargs): --> 160 args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) 161 if module._hf_hook.no_grad: 162 with torch.no_grad(): File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:293, in AlignDevicesHook.pre_forward(self, module, *args, **kwargs) 291 if self.weights_map[name].dtype == torch.int8: 292 fp16_statistics = self.weights_map[name.replace("weight", "SCB")] --> 293 set_module_tensor_to_device( 294 module, name, self.execution_device, value=self.weights_map[name], fp16_statistics=fp16_statistics 295 ) 297 return send_to_device(args, self.execution_device), send_to_device( 298 kwargs, self.execution_device, skip_keys=self.skip_keys 299 ) File /usr/local/lib/python3.10/dist-packages/accelerate/utils/modeling.py:347, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics) 345 module._parameters[tensor_name] = param_cls(new_value, requires_grad=old_value.requires_grad) 346 elif isinstance(value, torch.Tensor): --> 347 new_value = value.to(device) 348 else: 349 new_value = torch.tensor(value, device=device) NotImplementedError: Cannot copy out of meta tensor; no data!
The text was updated successfully, but these errors were encountered:
Try again with restarting the Kernel it seems you GPU is already busy
Sorry, something went wrong.
No branches or pull requests
gave error:
The text was updated successfully, but these errors were encountered: