I tried to add .clone()
at anywhere I think it's necessary.
But I still got inplace operation
error.
And this error happened in second epoch, rather than first epoch when the beginning I got this error.
class Model(torch.nn.Module):
def __init__(self, user_M, item_M, feature_M, embedding_M):
super(Model, self).__init__()
self.dimension = embedding_M + feature_M + 1
# Memories
self.user_memory = torch.randn(user_M, 2, 1, self.dimension)
self.item_memory = torch.randn(item_M, 2, 1, self.dimension)
# Writer
self.user_LSTM = nn.LSTMCell(self.dimension, self.dimension)
self.item_LSTM = nn.LSTMCell(self.dimension, self.dimension)
def score(self, user_id, item_id):
user_em = torch.index_select(self.user_memory[:, 1, :].clone(), 0, user_id[:,0].clone())
item_em = torch.index_select(self.item_memory[:, 1, :].clone(), 0, item_id[:,0].clone())
return torch.sigmoid(torch.bmm(user_em, item_em.permute(0,2,1)))
def forward(self, train_data, user_id, item_id):
ui_train = train_data[:, 0:1].type(torch.LongTensor).clone()
ii_train = train_data[:, 1:2].type(torch.LongTensor).clone()
for row in range(len(train_data)):
memory = self.user_memory[ui_train[row][0]].clone()
h, c = self.user_LSTM(train_data[row:row+1].clone(), (memory[0], memory[1]))
self.user_memory[ui_train[row]] = torch.stack((h, c))
for row in range(len(train_data)):
memory = self.item_memory[ii_train[row][0]].clone()
h, c = self.item_LSTM(train_data[row:row+1].clone(), (memory[0], memory[1]))
self.item_memory[ii_train[row]] = torch.stack((h, c))
# Scoring
y = self.score(user_id, item_id)
return y.reshape(-1, 1)
model = Model(user_M, item_M, feature_M, 3)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
for epoch in range(10):
print(epoch)
output = model(train_data, user_id, item_id)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
`RuntimeError`: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [176, 704]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
Just as @jodag said in the comment, the problematic lines are:
self.user_memory[ui_train[row]] = torch.stack((h, c)
self.item_memory[ii_train[row]] = torch.stack((h, c))
There you are modifying the values of the self.user_memory
and self.item_memory
in place, which caused the error.
try
self.user_memory = self.user_memory.clone()
self.user_memory[ui_train[row]] = torch.stack((h, c)
self.item_memory = self.item_memory.clone()
self.item_memory[ii_train[row]] = torch.stack((h, c))
and you can remove the other clone operations in your code, such as:
memory = self.user_memory[ui_train[row][0]].clone()
they are not necessary.
The reason why inplace operation could cause an error is that pytorch relies on intermediate tensor values to compute the gradient in the backward pass. If the tensor you are trying to modify happens to be required as an intermediate tensor, then this will break the gradient computation graph.
The solution is instead of modifying the tensor inplace, we make a clone of the original tensor and modify on the cloned tensor. In this way, pytorch could still access the old tensor.
c.f. https://pytorch.org/docs/stable/notes/autograd.html#in-place-operations-with-autograd
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With