I want to force the Huggingface transformer (BERT) to make use of CUDA.
nvidia-smi showed that all my CPU cores were maxed out during the code execution, but my GPU was at 0% utilization. Unfortunately, I'm new to the Hugginface library as well as PyTorch and don't know where to place the CUDA attributes device = cuda:0 or .to(cuda:0).
The code below is basically a customized part from german sentiment BERT working example
class SentimentModel_t(pt.nn.Module):
      def __init__(self, model_name: str = "oliverguhr/german-sentiment-bert"):
           DEVICE = "cuda:0" if pt.cuda.is_available() else "cpu"
           print(DEVICE)
           super(SentimentModel_t,self).__init__()
           self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)
           self.tokenizer = BertTokenizerFast.from_pretrained(model_name)
    
        def predict_sentiment(self, texts: List[str])-> List[str]:
            texts = [self.clean_text(text) for text in texts]
            # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
            input_ids = self.tokenizer.batch_encode_plus(texts,padding=True, add_special_tokens=True, truncation=True, max_length=self.tokenizer.max_len_single_sentence)
            input_ids = pt.tensor(input_ids["input_ids"])
    
            with pt.no_grad():
                logits = self.model(input_ids)
    
            label_ids = pt.argmax(logits[0], axis=1)
    
            labels = [self.model.config.id2label[label_id] for label_id in label_ids.tolist()]
            return labels
EDIT: After applying the suggestions of @KonstantinosKokos (see edited code above) I got a
RuntimeError: Input, output and indices must be on the current device
pointing to
        with pt.no_grad():
           logits = self.model(input_ids)
The full error code can be obtained down below:
<ipython-input-15-b843edd87a1a> in predict_sentiment(self, texts)
     23 
     24         with pt.no_grad():
---> 25             logits = self.model(input_ids)
     26 
     27         label_ids = pt.argmax(logits[0], axis=1)
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
   1364         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
   1365 
-> 1366         outputs = self.bert(
   1367             input_ids,
   1368             attention_mask=attention_mask,
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, output_attentions, output_hidden_states, return_dict)
    859         head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
    860 
--> 861         embedding_output = self.embeddings(
    862             input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
    863         )
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, token_type_ids, position_ids, inputs_embeds)
    196 
    197         if inputs_embeds is None:
--> 198             inputs_embeds = self.word_embeddings(input_ids)
    199         token_type_embeddings = self.token_type_embeddings(token_type_ids)
    200 
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/sparse.py in forward(self, input)
    122 
    123     def forward(self, input: Tensor) -> Tensor:
--> 124         return F.embedding(
    125             input, self.weight, self.padding_idx, self.max_norm,
    126             self.norm_type, self.scale_grad_by_freq, self.sparse)
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   1850         # remove once script supports set_grad_enabled
   1851         _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1852     return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
   1853 
   1854 
You can make the entire class inherit torch.nn.Module like so:
class SentimentModel_t(torch.nn.Module):
    def __init___(...)
    super(SentimentModel_t, self).__init__()
    ...
Upon initializing your model you can then call .to(device) to cast it to the device of your choice, like so:
sentiment_model = SentimentModel_t(...)
sentiment_model.to('cuda')
The .to() recursively applies to all submodules of the class, model being one of them (hugging face model inherit torch.nn.Module, thus providing an implementation for to()).
Note that this makes choosing device in the __init__() redundant: its now an external context that you can switch to/from easily.
Alternatively, you can hardcode the device by casting the contained BERT model directly into cuda (less elegant):
class SentimentModel_t():
        def __init__(self, ...):
            DEVICE = "cuda:0" if pt.cuda.is_available() else "cpu"
            print(DEVICE)
            self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)
I am a bit late to the party. The python package that I wrote already uses your GPU. You can have a look at the code to see how it was implemented
Just install the package:
pip install germansentiment
and run the code:
from germansentiment import SentimentModel
model = SentimentModel()
texts = [
    "Mit keinem guten Ergebniss","Das ist gar nicht mal so gut",
    "Total awesome!","nicht so schlecht wie erwartet",
    "Der Test verlief positiv.","Sie fährt ein grünes Auto."]
result = model.predict_sentiment(texts)
print(result)
Important: If you write your own code to use the model, you need to run the preprocessing code as well. Otherwise the results can be off.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With