Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why is Loss of SGD for a dataset is not matching the pytorch code with the scratch python code for linear regression?

I'm trying to implement Multiple Linear regression on the wine dataset. But when I compare the results of Pytorch with scratch code of Python the losses are not coming same.

My Scratch Code:

Functions:

def yinfer(X, beta):
  return beta[0] + np.dot(X,beta[1:]) 

def cost(X, Y, beta):
  sum = 0
  m = len(Y)
  for i in range(m): 
    sum = sum + ( yinfer(X[i],beta) - Y[i])*(yinfer(X[i],beta) - Y[i])
  return  sum/(1.0*m)

Main Code:

alpha = 0.005
b=[0,0.04086357 ,-0.02831656  ,0.09622949 ,-0.15162516  ,0.60188454  ,0.47528714,
  -0.6066466  ,-0.22995654 ,-0.58388734  ,0.20954669 ,-0.67851365]
beta = np.array(b)
print(beta)
iterations = 1000
arr_cost = np.zeros((iterations,2))
m = len(Y)
temp_beta = np.zeros(12)
for i in range(iterations):
  for k in range(m): 
        temp_beta[0] =  yinfer(X[k,:], beta) - Y[k]
        temp_beta[1:] = (yinfer(X[k,:], beta) - Y[k])*X[k,:]
        beta = beta - alpha*temp_beta/(1.0*m)    #(m*np.linalg.norm(temp_beta))
  arr_cost[i] = [i,cost(X,Y,beta)]
  #print(cost(X,Y,beta))
plt.scatter(arr_cost[0:iterations,0], arr_cost[0:iterations,1])

I have used same weights that were used in Pytorch code

My Pytorch code:

class LinearRegression(nn.Module):
  def __init__(self,n_input_features):
    super(LinearRegression,self).__init__()
    self.linear=nn.Linear(n_input_features,1)
    # self.linear.weight.data=b.view(1,-1)
    self.linear.bias.data.fill_(0.0)
    nn.init.xavier_uniform_(self.linear.weight)
    # nn.init.xavier_normal_(self.linear.bias)
  def forward(self,x):
    y_predicted=self.linear(x)
    return y_predicted
model=LinearRegression(11)
criterion = nn.MSELoss()
num_epochs=1000
for epoch in range(num_epochs):
  for x,y in train_data:
    y_pred=model(x)
    loss=criterion(y,y_pred)
    # print(loss)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

My DataLoader:

class Data(Dataset):
    def __init__(self):
        self.x=x_train
        self.y=y_train
        self.len=self.x.shape[0]
    def __getitem__(self,index):
      return self.x[index],self.y[index]
    def __len__(self):
        return self.len
dataset=Data()
train_data=DataLoader(dataset=dataset,batch_size=1,shuffle=False)

The graph comparing both the losses

Can someone please tell me why is that happening or is there any faults in my code?

like image 574
Rest1ve Avatar asked Nov 15 '22 05:11

Rest1ve


1 Answers

There were a couple of tweaks necessary to the code. I also had to create data and an optimizer, which you hadn't provided. With the changes below, both methods produce a learning function. Of course optimal hyperparameters such as alpha or iterations might be different between the two approaches, and you might need to find them separately.

# Create data:
import sklearn
X, Y = sklearn.datasets.load_diabetes(return_X_y=True)
# Adding a random column to match your data shape:
X = np.hstack((X, np.random.randn(X.shape[0], 1)))

iterations = 500

################
# Python version
def yinfer(X, beta):
    return beta[0] + np.dot(X,beta[1:]) 

def cost(X, Y, beta):
    sum = 0
    m = len(Y)
    for i in range(m): 
        sum = sum + ( yinfer(X[i], beta) - Y[i])*(yinfer(X[i], beta) - Y[i])
    return  sum/(1.0*m)

beta = np.array([0,0.04086357 ,-0.02831656  ,0.09622949 ,-0.15162516  ,0.60188454  ,0.47528714,
  -0.6066466  ,-0.22995654 ,-0.58388734  ,0.20954669 ,-0.67851365])
arr_cost = []
m = len(Y)
alpha = 0.1
temp_beta = np.zeros(12)
for i in range(iterations):
    for k in range(m): 
        temp_beta[0] =  yinfer(X[k,:], beta) - Y[k]
        temp_beta[1:] = (yinfer(X[k,:], beta) - Y[k])*X[k,:]
        beta = beta - alpha*temp_beta/(1.0*m)
    arr_cost.append(cost(X,Y,beta))

#################
# Pytorch version
from torch import nn
from torch import optim

class LinearRegression(nn.Module):
    def __init__(self,n_input_features):
        super(LinearRegression,self).__init__()
        self.linear=nn.Linear(n_input_features,1)
        self.linear.bias.data.fill_(0.0)
        nn.init.xavier_uniform_(self.linear.weight)
    def forward(self,x):
        y_predicted=self.linear(x)
        return y_predicted

class Data(Dataset):
    def __init__(self, x_train, y_train):
        self.x=x_train
        self.y=y_train
        self.len=self.x.shape[0]
    def __getitem__(self,index):
        return self.x[index],self.y[index]
    def __len__(self):
        return self.len
train_data=DataLoader(dataset=Data(X, Y),batch_size=1,shuffle=False)

criterion = nn.MSELoss()
model=LinearRegression(11)
optimizer = optim.SGD(model.parameters(), lr=0.01)

loss_vals = []  # store results
for epoch in range(iterations):
    for x, y in train_data:
        x, y = x.float(), y.float()
        y_pred=model.forward(x)
        loss=criterion(y, y_pred)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    loss_vals.append(float(loss))
    
##############
# Plot results
f, ax = plt.subplots(1,1, figsize=(20,5))
ax.plot(range(1, iterations+1), arr_cost, label='python')
ax.plot(range(1, iterations+1), loss_vals, label='torch')
ax.legend(); ax.set_xlabel('epochs'); ax.set_xlabel('loss');
like image 189
joeDiHare Avatar answered Nov 24 '22 00:11

joeDiHare