Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

RuntimeError: GET was unable to find an engine to execute this computation when I using Trainer.train() from hungingface

RuntimeError                              Traceback (most recent call last)
Input In [46], in <cell line: 1>()
----> 1 train_results = trainer.train()
      2 wandb.finish()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1543, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1538     self.model_wrapped = self.model
   1540 inner_training_loop = find_executable_batch_size(
   1541     self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
   1542 )
-> 1543 return inner_training_loop(
   1544     args=args,
   1545     resume_from_checkpoint=resume_from_checkpoint,
   1546     trial=trial,
   1547     ignore_keys_for_eval=ignore_keys_for_eval,
   1548 )

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1791, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   1789         tr_loss_step = self.training_step(model, inputs)
   1790 else:
-> 1791     tr_loss_step = self.training_step(model, inputs)
   1793 if (
   1794     args.logging_nan_inf_filter
   1795     and not is_torch_tpu_available()
   1796     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
   1797 ):
   1798     # if loss is nan or inf simply add the average of previous logged losses
   1799     tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2539, in Trainer.training_step(self, model, inputs)
   2536     return loss_mb.reduce_mean().detach().to(self.args.device)
   2538 with self.compute_loss_context_manager():
-> 2539     loss = self.compute_loss(model, inputs)
   2541 if self.args.n_gpu > 1:
   2542     loss = loss.mean()  # mean() to average on multi-gpu parallel training

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2571, in Trainer.compute_loss(self, model, inputs, return_outputs)
   2569 else:
   2570     labels = None
-> 2571 outputs = model(**inputs)
   2572 # Save past state if it exists
   2573 # TODO: this needs to be fixed and made cleaner later.
   2574 if self.args.past_index >= 0:

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/transformers/models/swinv2/modeling_swinv2.py:1274, in Swinv2ForImageClassification.forward(self, pixel_values, head_mask, labels, output_attentions, output_hidden_states, return_dict)
   1266 r"""
   1267 labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
   1268     Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
   1269     config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
   1270     `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
   1271 """
   1272 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-> 1274 outputs = self.swinv2(
   1275     pixel_values,
   1276     head_mask=head_mask,
   1277     output_attentions=output_attentions,
   1278     output_hidden_states=output_hidden_states,
   1279     return_dict=return_dict,
   1280 )
   1282 pooled_output = outputs[1]
   1284 logits = self.classifier(pooled_output)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/transformers/models/swinv2/modeling_swinv2.py:1076, in Swinv2Model.forward(self, pixel_values, bool_masked_pos, head_mask, output_attentions, output_hidden_states, return_dict)
   1069 # Prepare head mask if needed
   1070 # 1.0 in head_mask indicate we keep the head
   1071 # attention_probs has shape bsz x n_heads x N x N
   1072 # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
   1073 # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
   1074 head_mask = self.get_head_mask(head_mask, len(self.config.depths))
-> 1076 embedding_output, input_dimensions = self.embeddings(pixel_values, bool_masked_pos=bool_masked_pos)
   1078 encoder_outputs = self.encoder(
   1079     embedding_output,
   1080     input_dimensions,
   (...)
   1084     return_dict=return_dict,
   1085 )
   1087 sequence_output = encoder_outputs[0]

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/transformers/models/swinv2/modeling_swinv2.py:295, in Swinv2Embeddings.forward(self, pixel_values, bool_masked_pos)
    292 def forward(
    293     self, pixel_values: Optional[torch.FloatTensor], bool_masked_pos: Optional[torch.BoolTensor] = None
    294 ) -> Tuple[torch.Tensor]:
--> 295     embeddings, output_dimensions = self.patch_embeddings(pixel_values)
    296     embeddings = self.norm(embeddings)
    297     batch_size, seq_len, _ = embeddings.size()

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/transformers/models/swinv2/modeling_swinv2.py:353, in Swinv2PatchEmbeddings.forward(self, pixel_values)
    351 # pad the input to be divisible by self.patch_size, if needed
    352 pixel_values = self.maybe_pad(pixel_values, height, width)
--> 353 embeddings = self.projection(pixel_values)
    354 _, _, height, width = embeddings.shape
    355 output_dimensions = (height, width)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/conv.py:463, in Conv2d.forward(self, input)
    462 def forward(self, input: Tensor) -> Tensor:
--> 463     return self._conv_forward(input, self.weight, self.bias)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
    455 if self.padding_mode != 'zeros':
    456     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    457                     weight, bias, self.stride,
    458                     _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
    460                 self.padding, self.dilation, self.groups)

RuntimeError: GET was unable to find an engine to execute this computation

I'm not sure that what happen basically this error NOT show when I run my pipeline but a few day ago is appear. How to fix them

Swintransformer from hunggingface : 'microsoft/swinv2-tiny-patch4-window8-256' from transformers import AutoModelForImageClassification, AutoImageProcessor

!pip install transformers==4.26.0 !pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117 !pip install tensorflow --upgrade

How to fix them to working NOT error.........

like image 913
Frong Avatar asked Dec 28 '25 06:12

Frong


1 Answers

In my case, I faced this problem while the gpu does not have enough memory. Reducing the bathsize or training with amp may be helpful to solve this problem.

like image 76
lzx1413 Avatar answered Dec 31 '25 00:12

lzx1413