I wrote some models in pytorch which was not able to learn anything even after many epochs. In order to debug the problem I made a simple model which models identity function of an input. The difficulty is this model also doesn't learn nothing despite training for 50k epochs,
import torch
import torch.nn as nn
torch.manual_seed(1)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.input = nn.Linear(2,4)
self.hidden = nn.Linear(4,4)
self.output = nn.Linear(4,2)
self.relu = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
self.dropout = nn.Dropout(0.5)
def forward(self,x):
x = self.input(x)
x = self.dropout(x)
x = self.relu(x)
x = self.hidden(x)
x = self.dropout(x)
x = self.relu(x)
x = self.output(x)
x = self.softmax(x)
return x
X = torch.tensor([[1,0],[1,0],[0,1],[0,1]],dtype=torch.float)
net = Net()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(net.parameters(), lr=0.001)
for i in range(100000):
opt.zero_grad()
y = net(X)
loss = criterion(y,torch.argmax(X,dim=1))
loss.backward()
if i%500 ==0:
print("Epoch: ",i)
print(torch.argmax(y,dim=1).detach().numpy().tolist())
print("Loss: ",loss.item())
print()
Output
Epoch: 52500
[0, 0, 1, 0]
Loss: 0.6554909944534302
Epoch: 53000
[0, 0, 0, 0]
Loss: 0.7004914283752441
Epoch: 53500
[0, 0, 0, 0]
Loss: 0.7156486511230469
Epoch: 54000
[0, 0, 0, 0]
Loss: 0.7171240448951721
Epoch: 54500
[0, 0, 0, 0]
Loss: 0.691678524017334
Epoch: 55000
[0, 0, 0, 0]
Loss: 0.7301554679870605
Epoch: 55500
[0, 0, 0, 0]
Loss: 0.728650689125061
What is wrong with my implementation?
There are a few mistakes:
optimizer.step()
:optimizer.step()
updates the parameters based on backpropagated gradients and other accumulated momentum and all.
softmax
with CrossEntropy
Loss:Pytorch CrossEntropyLoss
criterion combines nn.LogSoftmax()
and nn.NLLLoss()
in one single class. i.e. it applies softmax then takes negative log. So in your case you are taking softmax(softmax(output)). Correct way is use linear
output layer while training
and use softmax
layer or just take argmax
for prediction.
Which results in underfitting.
Here's the corrected code:
import torch
import torch.nn as nn
torch.manual_seed(1)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.input = nn.Linear(2,4)
self.hidden = nn.Linear(4,4)
self.output = nn.Linear(4,2)
self.relu = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
# self.dropout = nn.Dropout(0.0)
def forward(self,x):
x = self.input(x)
# x = self.dropout(x)
x = self.relu(x)
x = self.hidden(x)
# x = self.dropout(x)
x = self.relu(x)
x = self.output(x)
# x = self.softmax(x)
return x
def predict(self, x):
with torch.no_grad():
out = self.forward(x)
return self.softmax(out)
X = torch.tensor([[1,0],[1,0],[0,1],[0,1]],dtype=torch.float)
net = Net()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(net.parameters(), lr=0.001)
for i in range(100000):
opt.zero_grad()
y = net(X)
loss = criterion(y,torch.argmax(X,dim=1))
loss.backward()
# This was missing before
opt.step()
if i%500 ==0:
print("Epoch: ",i)
pred = net.predict(X)
print(f'prediction: {torch.argmax(pred, dim=1).detach().numpy().tolist()}, actual: {torch.argmax(X,dim=1)}')
print("Loss: ", loss.item())
Output:
Epoch: 0
prediction: [0, 0, 0, 0], actual: tensor([0, 0, 1, 1])
Loss: 0.7042869329452515
Epoch: 500
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.1166711300611496
Epoch: 1000
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.05215628445148468
Epoch: 1500
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.02993333339691162
Epoch: 2000
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.01916157826781273
Epoch: 2500
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.01306679006665945
Epoch: 3000
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.009280549362301826
.
.
.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With