You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When I run CUDA_VISIBLE_DEVICES=0,1 torchrun --standalone --nnodes=1 --nproc-per-node=2 cli_demo_sat.py --from_pretrained cogcom-base-17b --local_tokenizer tokenizer --english --fp16 (--quant 4), and test certain pictures, 50% of them will lead to exception, so I use traceback to print it:
Traceback (most recent call last):
File "cli_demo_sat.py", line 116, in main
response, history, cache_image = chat(
File "cli_demo_sat.py", line 116, in main
response, history, cache_image = chat(
File "/CogCoM/cogcom/utils/chat.py", line 229, in chat
(output, turns_mems), turns_mems_mask = filling_sequence(
File "/CogCoM/cogcom/utils/chat.py", line 229, in chat
(output, turns_mems), turns_mems_mask = filling_sequence(
File "/CogCoM/cogcom/utils/chat.py", line 87, in filling_sequence
logits, *output_per_layers = model(
File "/CogCoM/cogcom/utils/chat.py", line 87, in filling_sequence
logits, *output_per_layers = model(
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/CogCoM/cogcom/models/cogcom_model.py", line 142, in forward
return super().forward(input_ids=input_ids, vision_expert_mask=vision_expert_mask, image_embed_mask=image_embed_mask, **kwargs)
File "/CogCoM/cogcom/models/cogcom_model.py", line 142, in forward
return super().forward(input_ids=input_ids, vision_expert_mask=vision_expert_mask, image_embed_mask=image_embed_mask, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/base_model.py", line 137, in forward
return self.transformer(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/base_model.py", line 137, in forward
return self.transformer(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 668, in forward
layer_ret = layer(*args, layer_id=torch.tensor(i), **kw_args, position_ids=position_ids, **output_cross_layer,
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 668, in forward
layer_ret = layer(*args, layer_id=torch.tensor(i), **kw_args, position_ids=position_ids, **output_cross_layer,
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 390, in forward
return HOOKS_DEFAULT['layer_forward'](self, hidden_states, mask, *args, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 390, in forward
return HOOKS_DEFAULT['layer_forward'](self, hidden_states, mask, *args, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 172, in layer_forward_default
attention_output = self.attention(attention_input, mask, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 172, in layer_forward_default
attention_output = self.attention(attention_input, mask, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 111, in forward
return self.hooks['attention_forward'](hidden_states, mask, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 111, in forward
return self.hooks['attention_forward'](hidden_states, mask, **kw_args)
File "/CogCoM/cogcom/models/mixin.py", line 256, in attention_forward
context_layer = attention_fn(query_layer, key_layer, value_layer, mask, dropout_fn, **kw_args)
File "/CogCoM/cogcom/models/mixin.py", line 256, in attention_forward
context_layer = attention_fn(query_layer, key_layer, value_layer, mask, dropout_fn, **kw_args)
File "/CogCoM/cogcom/models/com_memory.py", line 49, in attention_fn
return old_impl(q, k, v, mask, dropout_fn, cross_attention=cross_attention, mems=mems, **kw_args)
File "/CogCoM/cogcom/models/com_memory.py", line 49, in attention_fn
return old_impl(q, k, v, mask, dropout_fn, cross_attention=cross_attention, mems=mems, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 68, in attention_fn_default
return standard_attention(
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 68, in attention_fn_default
return standard_attention(
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 43, in standard_attention
context_layer = torch.matmul(attention_probs, value_layer)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 43, in standard_attention
context_layer = torch.matmul(attention_probs, value_layer)
RuntimeError: expected scalar type Half but found Float
RuntimeError: expected scalar type Half but found Float
I wonder what leads to this problem. Besides, sometimes whether I use quant 4 determines runtime error or not.
Is there any solution to this problem? Thanks a lot!
The text was updated successfully, but these errors were encountered:
When I run
CUDA_VISIBLE_DEVICES=0,1 torchrun --standalone --nnodes=1 --nproc-per-node=2 cli_demo_sat.py --from_pretrained cogcom-base-17b --local_tokenizer tokenizer --english --fp16 (--quant 4)
, and test certain pictures, 50% of them will lead to exception, so I use traceback to print it:I wonder what leads to this problem. Besides, sometimes whether I use quant 4 determines runtime error or not.
Is there any solution to this problem? Thanks a lot!
The text was updated successfully, but these errors were encountered: