Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re. fine-tune-llms-in-2024-with-trl.ipynb #45

Open
andysingal opened this issue Mar 1, 2024 · 1 comment
Open

Re. fine-tune-llms-in-2024-with-trl.ipynb #45

andysingal opened this issue Mar 1, 2024 · 1 comment

Comments

@andysingal
Copy link

from datasets import load_dataset 
from random import randint


# Load our test dataset
eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train")
rand_idx = randint(0, len(eval_dataset))

# Test on sample 
prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)

print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")

gave error:

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[14], line 11
      9 # Test on sample 
     10 prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
---> 11 outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
     13 print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
     14 print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")

File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:208, in TextGenerationPipeline.__call__(self, text_inputs, **kwargs)
    167 def __call__(self, text_inputs, **kwargs):
    168     """
    169     Complete the prompt(s) given as inputs.
    170 
   (...)
    206           ids of the generated text.
    207     """
--> 208     return super().__call__(text_inputs, **kwargs)

File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1140, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
   1132     return next(
   1133         iter(
   1134             self.get_iterator(
   (...)
   1137         )
   1138     )
   1139 else:
-> 1140     return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)

File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1147, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
   1145 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
   1146     model_inputs = self.preprocess(inputs, **preprocess_params)
-> 1147     model_outputs = self.forward(model_inputs, **forward_params)
   1148     outputs = self.postprocess(model_outputs, **postprocess_params)
   1149     return outputs

File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1046, in Pipeline.forward(self, model_inputs, **forward_params)
   1044     with inference_context():
   1045         model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1046         model_outputs = self._forward(model_inputs, **forward_params)
   1047         model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
   1048 else:

File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:271, in TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs)
    268         generate_kwargs["min_length"] += prefix_length
    270 # BS x SL
--> 271 generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
    272 out_b = generated_sequence.shape[0]
    273 if self.framework == "pt":

File /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:1140, in PeftModelForCausalLM.generate(self, **kwargs)
   1138     self.base_model.generation_config = self.generation_config
   1139 try:
-> 1140     outputs = self.base_model.generate(**kwargs)
   1141 except:
   1142     self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation

File /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    112 @functools.wraps(func)
    113 def decorate_context(*args, **kwargs):
    114     with ctx_factory():
--> 115         return func(*args, **kwargs)

File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1718, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
   1701     return self.assisted_decoding(
   1702         input_ids,
   1703         assistant_model=assistant_model,
   (...)
   1714         **model_kwargs,
   1715     )
   1716 if generation_mode == GenerationMode.GREEDY_SEARCH:
   1717     # 11. run greedy search
-> 1718     return self.greedy_search(
   1719         input_ids,
   1720         logits_processor=logits_processor,
   1721         stopping_criteria=stopping_criteria,
   1722         pad_token_id=generation_config.pad_token_id,
   1723         eos_token_id=generation_config.eos_token_id,
   1724         output_scores=generation_config.output_scores,
   1725         return_dict_in_generate=generation_config.return_dict_in_generate,
   1726         synced_gpus=synced_gpus,
   1727         streamer=streamer,
   1728         **model_kwargs,
   1729     )
   1731 elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH:
   1732     if not model_kwargs["use_cache"]:

File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:2579, in GenerationMixin.greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
   2576 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
   2578 # forward pass to get next token
-> 2579 outputs = self(
   2580     **model_inputs,
   2581     return_dict=True,
   2582     output_attentions=output_attentions,
   2583     output_hidden_states=output_hidden_states,
   2584 )
   2586 if synced_gpus and this_peer_finished:
   2587     continue  # don't waste resources running the code we don't need

File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
   1516     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517 else:
-> 1518     return self._call_impl(*args, **kwargs)

File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
   1522 # If we don't have any hooks, we want to skip the rest of the logic in
   1523 # this function, and just call forward.
   1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1525         or _global_backward_pre_hooks or _global_backward_hooks
   1526         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527     return forward_call(*args, **kwargs)
   1529 try:
   1530     result = None

File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
    163         output = module._old_forward(*args, **kwargs)
    164 else:
--> 165     output = module._old_forward(*args, **kwargs)
    166 return module._hf_hook.post_forward(module, output)

File /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:1199, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
   1197     logits = torch.cat(logits, dim=-1)
   1198 else:
-> 1199     logits = self.lm_head(hidden_states)
   1200 logits = logits.float()
   1202 loss = None

File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
   1516     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517 else:
-> 1518     return self._call_impl(*args, **kwargs)

File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
   1522 # If we don't have any hooks, we want to skip the rest of the logic in
   1523 # this function, and just call forward.
   1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1525         or _global_backward_pre_hooks or _global_backward_hooks
   1526         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527     return forward_call(*args, **kwargs)
   1529 try:
   1530     result = None

File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:160, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
    159 def new_forward(module, *args, **kwargs):
--> 160     args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
    161     if module._hf_hook.no_grad:
    162         with torch.no_grad():

File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:293, in AlignDevicesHook.pre_forward(self, module, *args, **kwargs)
    291             if self.weights_map[name].dtype == torch.int8:
    292                 fp16_statistics = self.weights_map[name.replace("weight", "SCB")]
--> 293         set_module_tensor_to_device(
    294             module, name, self.execution_device, value=self.weights_map[name], fp16_statistics=fp16_statistics
    295         )
    297 return send_to_device(args, self.execution_device), send_to_device(
    298     kwargs, self.execution_device, skip_keys=self.skip_keys
    299 )

File /usr/local/lib/python3.10/dist-packages/accelerate/utils/modeling.py:347, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics)
    345             module._parameters[tensor_name] = param_cls(new_value, requires_grad=old_value.requires_grad)
    346 elif isinstance(value, torch.Tensor):
--> 347     new_value = value.to(device)
    348 else:
    349     new_value = torch.tensor(value, device=device)

NotImplementedError: Cannot copy out of meta tensor; no data!
@philschmid
Copy link
Owner

Try again with restarting the Kernel it seems you GPU is already busy

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants