You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Run the code below, which is trying to use layer conductance attribution for llama2.
importtorchfromtransformersimportAutoTokenizer, AutoModelForCausalLMfromcaptum.attrimportLayerConductanceimportbitsandbytesasbnbdefload_model(model_name, bnb_config):
n_gpus=torch.cuda.device_count()
max_memory="10000MB"model=AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
# device_map="cpu"device_map="auto", # dispatch efficiently the model on the available ressourcesmax_memory= {i: max_memoryforiinrange(n_gpus)},
)
tokenizer=AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
# Needed for LLaMA tokenizertokenizer.pad_token=tokenizer.eos_tokenreturnmodel, tokenizerdefcreate_bnb_config():
bnb_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
returnbnb_configmodel_name="meta-llama/Llama-2-7b-chat-hf"bnb_config=create_bnb_config()
model, tokenizer=load_model(model_name, bnb_config)
layer=model.model.layers[-1]
input_test="The president of the USA is named"inputs=tokenizer(input_test, return_tensors="pt").to("cuda:0")
input_ids=inputs["input_ids"].int()
layer_cond=LayerConductance(model, layer)
llama_att=layer_cond.attribute(input_ids, target=0) # first token
Expected behavior
No error raised.
Environment
Describe the environment used for Captum
- Captum / PyTorch Version: 0.7.0 / 2.3.1
- OS: Linux
- How you installed Captum / PyTorch: pip
- Python version: 3.10
- CUDA/cuDNN version: 11.7
- GPU models and configuration: GA102GL [A40]
Additional context
Stack trace:
---------------------------------------------------------------------------RuntimeErrorTraceback (mostrecentcalllast)
CellIn[29], line21layer_cond=LayerConductance(model, layer)
---->2llama_att=layer_cond.attribute(input_ids, target=target)
File~/venv/lib/python3.10/site-packages/captum/log/__init__.py:42, inlog_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
40 @wraps(func)
41defwrapper(*args, **kwargs):
--->42returnfunc(*args, **kwargs)
File~/venv/lib/python3.10/site-packages/captum/attr/_core/layer/layer_conductance.py:292, inLayerConductance.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta, attribute_to_layer_input)
277attrs=_batch_attribution(
278self,
279num_examples,
(...)
288attribute_to_layer_input=attribute_to_layer_input,
289 )
291else:
-->292attrs=self._attribute(
293inputs=inputs,
294baselines=baselines,
295target=target,
296additional_forward_args=additional_forward_args,
297n_steps=n_steps,
298method=method,
299attribute_to_layer_input=attribute_to_layer_input,
300 )
302is_layer_tuple=isinstance(attrs, tuple)
303attributions=attrsifis_layer_tupleelse (attrs,)
File~/venv/lib/python3.10/site-packages/captum/attr/_core/layer/layer_conductance.py:360, inLayerConductance._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, attribute_to_layer_input, step_sizes_and_alphas)
356expanded_target=_expand_target(target, n_steps+1)
358# Conductance Gradients - Returns gradient of output with respect to359# hidden layer and hidden layer evaluated at each input.-->360 (layer_gradients, layer_evals,) =compute_layer_gradients_and_eval(
361forward_fn=self.forward_func,
362layer=self.layer,
363inputs=scaled_features_tpl,
364additional_forward_args=input_additional_args,
365target_ind=expanded_target,
366device_ids=self.device_ids,
367attribute_to_layer_input=attribute_to_layer_input,
368 )
370# Compute differences between consecutive evaluations of layer_eval.371# This approximates the total input gradient of each step multiplied372# by the step size.373grad_diffs=tuple(
374layer_eval[num_examples:] -layer_eval[:-num_examples]
375forlayer_evalinlayer_evals376 )
File~/venv/lib/python3.10/site-packages/captum/_utils/gradient.py:592, incompute_layer_gradients_and_eval(forward_fn, layer, inputs, target_ind, additional_forward_args, gradient_neuron_selector, device_ids, attribute_to_layer_input, output_fn)
541r""" 542 Computes gradients of the output with respect to a given layer as well 543 as the output evaluation of the layer for an arbitrary forward function (...) 587 Target layer output for given input. 588 """589withtorch.autograd.set_grad_enabled(True):
590# saved_layer is a dictionary mapping device to a tuple of591# layer evaluations on that device.-->592saved_layer, output=_forward_layer_distributed_eval(
593forward_fn,
594inputs,
595layer,
596target_ind=target_ind,
597additional_forward_args=additional_forward_args,
598attribute_to_layer_input=attribute_to_layer_input,
599forward_hook_with_return=True,
600require_layer_grads=True,
601 )
602assertoutput[0].numel() ==1, (
603"Target not provided when necessary, cannot"604" take gradient with respect to multiple outputs."605 )
607device_ids=_extract_device_ids(forward_fn, saved_layer, device_ids)
File~/venv/lib/python3.10/site-packages/captum/_utils/gradient.py:294, in_forward_layer_distributed_eval(forward_fn, inputs, layer, target_ind, additional_forward_args, attribute_to_layer_input, forward_hook_with_return, require_layer_grads)
290else:
291all_hooks.append(
292single_layer.register_forward_hook(hook_wrapper(single_layer))
293 )
-->294output=_run_forward(
295forward_fn,
296inputs,
297target=target_ind,
298additional_forward_args=additional_forward_args,
299 )
300finally:
301forhookinall_hooks:
File~/venv/lib/python3.10/site-packages/captum/_utils/common.py:531, in_run_forward(forward_func, inputs, target, additional_forward_args)
528inputs=_format_inputs(inputs)
529additional_forward_args=_format_additional_forward_args(additional_forward_args)
-->531output=forward_func(
532*(*inputs, *additional_forward_args)
533ifadditional_forward_argsisnotNone534elseinputs535 )
536return_select_targets(output, target)
File~/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, inModule._wrapped_call_impl(self, *args, **kwargs)
1530returnself._compiled_call_impl(*args, **kwargs) # type: ignore[misc]1531else:
->1532returnself._call_impl(*args, **kwargs)
File~/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, inModule._call_impl(self, *args, **kwargs)
1536# If we don't have any hooks, we want to skip the rest of the logic in1537# this function, and just call forward.1538ifnot (self._backward_hooksorself._backward_pre_hooksorself._forward_hooksorself._forward_pre_hooks1539or_global_backward_pre_hooksor_global_backward_hooks1540or_global_forward_hooksor_global_forward_pre_hooks):
->1541returnforward_call(*args, **kwargs)
1543try:
1544result=NoneFile~/venv/lib/python3.10/site-packages/accelerate/hooks.py:169, inadd_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
167output=module._old_forward(*args, **kwargs)
168else:
-->169output=module._old_forward(*args, **kwargs)
170returnmodule._hf_hook.post_forward(module, output)
File~/venv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:1174, inLlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)
1171return_dict=return_dictifreturn_dictisnotNoneelseself.config.use_return_dict1173# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)->1174outputs=self.model(
1175input_ids=input_ids,
1176attention_mask=attention_mask,
1177position_ids=position_ids,
1178past_key_values=past_key_values,
1179inputs_embeds=inputs_embeds,
1180use_cache=use_cache,
1181output_attentions=output_attentions,
1182output_hidden_states=output_hidden_states,
1183return_dict=return_dict,
1184cache_position=cache_position,
1185 )
1187hidden_states=outputs[0]
1188ifself.config.pretraining_tp>1:
File~/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, inModule._wrapped_call_impl(self, *args, **kwargs)
1530returnself._compiled_call_impl(*args, **kwargs) # type: ignore[misc]1531else:
->1532returnself._call_impl(*args, **kwargs)
File~/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, inModule._call_impl(self, *args, **kwargs)
1536# If we don't have any hooks, we want to skip the rest of the logic in1537# this function, and just call forward.1538ifnot (self._backward_hooksorself._backward_pre_hooksorself._forward_hooksorself._forward_pre_hooks1539or_global_backward_pre_hooksor_global_backward_hooks1540or_global_forward_hooksor_global_forward_pre_hooks):
->1541returnforward_call(*args, **kwargs)
1543try:
1544result=NoneFile~/venv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:931, inLlamaModel.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)
928use_cache=False930ifinputs_embedsisNone:
-->931inputs_embeds=self.embed_tokens(input_ids)
933return_legacy_cache=False934ifuse_cacheandnotisinstance(past_key_values, Cache): # kept for BC (non `Cache` `past_key_values` inputs)File~/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, inModule._wrapped_call_impl(self, *args, **kwargs)
1530returnself._compiled_call_impl(*args, **kwargs) # type: ignore[misc]1531else:
->1532returnself._call_impl(*args, **kwargs)
File~/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, inModule._call_impl(self, *args, **kwargs)
1536# If we don't have any hooks, we want to skip the rest of the logic in1537# this function, and just call forward.1538ifnot (self._backward_hooksorself._backward_pre_hooksorself._forward_hooksorself._forward_pre_hooks1539or_global_backward_pre_hooksor_global_backward_hooks1540or_global_forward_hooksor_global_forward_pre_hooks):
->1541returnforward_call(*args, **kwargs)
1543try:
1544result=NoneFile~/venv/lib/python3.10/site-packages/accelerate/hooks.py:169, inadd_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
167output=module._old_forward(*args, **kwargs)
168else:
-->169output=module._old_forward(*args, **kwargs)
170returnmodule._hf_hook.post_forward(module, output)
File~/venv/lib/python3.10/site-packages/torch/nn/modules/sparse.py:163, inEmbedding.forward(self, input)
162defforward(self, input: Tensor) ->Tensor:
-->163returnF.embedding(
164input, self.weight, self.padding_idx, self.max_norm,
165self.norm_type, self.scale_grad_by_freq, self.sparse)
File~/venv/lib/python3.10/site-packages/torch/nn/functional.py:2264, inembedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2258# Note [embedding_renorm set_grad_enabled]2259# XXX: equivalent to2260# with torch.no_grad():2261# torch.embedding_renorm_2262# remove once script supports set_grad_enabled2263_no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
->2264returntorch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expectedtensorforargument#1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
The text was updated successfully, but these errors were encountered:
🐛 Bug
To Reproduce
Steps to reproduce the behavior:
Run the code below, which is trying to use layer conductance attribution for llama2.
Expected behavior
No error raised.
Environment
Describe the environment used for Captum
Additional context
Stack trace:
The text was updated successfully, but these errors were encountered: