I want to force the Huggingface transformer (BERT) to make use of CUDA.
nvidia-smi showed that all my CPU cores were maxed out during the code execution, but my GPU was at 0% utilization. Unfortunately, I'm new to the Hugginface library as well as PyTorch and don't know where to place the CUDA attributes device = cuda:0 or .to(cuda:0).
The code below is basically a customized part from german sentiment BERT working example
class SentimentModel_t(pt.nn.Module):
def __init__(self, model_name: str = "oliverguhr/german-sentiment-bert"):
DEVICE = "cuda:0" if pt.cuda.is_available() else "cpu"
print(DEVICE)
super(SentimentModel_t,self).__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)
self.tokenizer = BertTokenizerFast.from_pretrained(model_name)
def predict_sentiment(self, texts: List[str])-> List[str]:
texts = [self.clean_text(text) for text in texts]
# Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
input_ids = self.tokenizer.batch_encode_plus(texts,padding=True, add_special_tokens=True, truncation=True, max_length=self.tokenizer.max_len_single_sentence)
input_ids = pt.tensor(input_ids["input_ids"])
with pt.no_grad():
logits = self.model(input_ids)
label_ids = pt.argmax(logits[0], axis=1)
labels = [self.model.config.id2label[label_id] for label_id in label_ids.tolist()]
return labels
EDIT: After applying the suggestions of @KonstantinosKokos (see edited code above) I got a
RuntimeError: Input, output and indices must be on the current device
pointing to
with pt.no_grad():
logits = self.model(input_ids)
The full error code can be obtained down below:
<ipython-input-15-b843edd87a1a> in predict_sentiment(self, texts)
23
24 with pt.no_grad():
---> 25 logits = self.model(input_ids)
26
27 label_ids = pt.argmax(logits[0], axis=1)
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1364 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1365
-> 1366 outputs = self.bert(
1367 input_ids,
1368 attention_mask=attention_mask,
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, output_attentions, output_hidden_states, return_dict)
859 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
860
--> 861 embedding_output = self.embeddings(
862 input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
863 )
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, token_type_ids, position_ids, inputs_embeds)
196
197 if inputs_embeds is None:
--> 198 inputs_embeds = self.word_embeddings(input_ids)
199 token_type_embeddings = self.token_type_embeddings(token_type_ids)
200
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/sparse.py in forward(self, input)
122
123 def forward(self, input: Tensor) -> Tensor:
--> 124 return F.embedding(
125 input, self.weight, self.padding_idx, self.max_norm,
126 self.norm_type, self.scale_grad_by_freq, self.sparse)
~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1850 # remove once script supports set_grad_enabled
1851 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1852 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1853
1854
You can make the entire class inherit torch.nn.Module like so:
class SentimentModel_t(torch.nn.Module):
def __init___(...)
super(SentimentModel_t, self).__init__()
...
Upon initializing your model you can then call .to(device) to cast it to the device of your choice, like so:
sentiment_model = SentimentModel_t(...)
sentiment_model.to('cuda')
The .to() recursively applies to all submodules of the class, model being one of them (hugging face model inherit torch.nn.Module, thus providing an implementation for to()).
Note that this makes choosing device in the __init__() redundant: its now an external context that you can switch to/from easily.
Alternatively, you can hardcode the device by casting the contained BERT model directly into cuda (less elegant):
class SentimentModel_t():
def __init__(self, ...):
DEVICE = "cuda:0" if pt.cuda.is_available() else "cpu"
print(DEVICE)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)
I am a bit late to the party. The python package that I wrote already uses your GPU. You can have a look at the code to see how it was implemented
Just install the package:
pip install germansentiment
and run the code:
from germansentiment import SentimentModel
model = SentimentModel()
texts = [
"Mit keinem guten Ergebniss","Das ist gar nicht mal so gut",
"Total awesome!","nicht so schlecht wie erwartet",
"Der Test verlief positiv.","Sie fährt ein grünes Auto."]
result = model.predict_sentiment(texts)
print(result)
Important: If you write your own code to use the model, you need to run the preprocessing code as well. Otherwise the results can be off.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With