I'm trying to create a multi layer neural net class in pytorch. I want to know if the following 2 pieces of code create the same network.
Model 1 with nn.Linear
class TestModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
    super(TestModel, self).__init__()
    self.fc1 = nn.Linear(input_dim,hidden_dim)
    self.fc2 = nn.Linear(hidden_dim,output_dim)
def forward(self, x):
    x = nn.functional.relu(self.fc1(x))
    x = nn.functional.softmax(self.fc2(x))
    return x       
Model 2 with nn.Sequential
class TestModel2(nn.Module):
def __init__(self, input, hidden, output):
    super(TestModel2, self).__init__()
    self.seq = nn.Sequential(
               nn.Linear(input_dim,hidden_dim),
               nn.ReLU(),
               nn.Linear(hidden_dim,output_dim),
               nn.Softmax()
               )
def forward(self, x):
    return self.seq(x)
nn. Sequential is faster than not using it.
The value a Sequential provides over manually calling a sequence of modules is that it allows treating the whole container as a single module, such that performing a transformation on the Sequential applies to each of the modules it stores (which are each a registered submodule of the Sequential ).
So nn. Sequential is a construction which is used when you want to run certain layers sequentially. It makes the forward to be readable and compact. So in the code you are pointing to, they build different ResNet architectures with the same function.
F. relu is a function that simply takes an output tensor as an input, converts all values that are less than 0 in that tensor to zero, and spits this out as an output. nn. ReLU does the exact same thing, except that it represents this operation in a different way, requiring us to first initialise the method with nn.
Yes, these two pieces of code create the same network. One way to convince yourself that this is true is to save both models to ONNX.
import torch.nn as nn
class TestModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TestModel, self).__init__()
        self.fc1 = nn.Linear(input_dim,hidden_dim)
        self.fc2 = nn.Linear(hidden_dim,output_dim)
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.softmax(self.fc2(x))
        return x   
class TestModel2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TestModel2, self).__init__()
        self.seq = nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, output_dim),
                nn.Softmax()
                )
    def forward(self, x):
        return self.seq(x)
m = TestModel(1, 2, 3)
m2 = TestModel2(1, 2, 3)
torch.onnx.export(m, torch.Tensor([0]), "test.onnx", verbose=True)
/opt/anaconda3/envs/py36/bin/ipython:9: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
graph(%0 : Float(1)
      %1 : Float(2, 1)
      %2 : Float(2)
      %3 : Float(3, 2)
      %4 : Float(3)) {
  %5 : Float(1!, 2) = onnx::Transpose[perm=[1, 0]](%1), scope: TestModel/Linear[fc1]
  %6 : Float(2) = onnx::MatMul(%0, %5), scope: TestModel/Linear[fc1]
  %7 : Float(2) = onnx::Add(%6, %2), scope: TestModel/Linear[fc1]
  %8 : Float(2) = onnx::Relu(%7), scope: TestModel
  %9 : Float(2!, 3!) = onnx::Transpose[perm=[1, 0]](%3), scope: TestModel/Linear[fc2]
  %10 : Float(3) = onnx::MatMul(%8, %9), scope: TestModel/Linear[fc2]
  %11 : Float(3) = onnx::Add(%10, %4), scope: TestModel/Linear[fc2]
  %12 : Float(3) = onnx::Softmax[axis=0](%11), scope: TestModel
  return (%12);
}
torch.onnx.export(m2, torch.Tensor([0]), "test.onnx", verbose=True)
/opt/anaconda3/envs/py36/lib/python3.6/site-packages/torch/nn/modules/module.py:475: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  result = self._slow_forward(*input, **kwargs)
graph(%0 : Float(1)
      %1 : Float(2, 1)
      %2 : Float(2)
      %3 : Float(3, 2)
      %4 : Float(3)) {
  %5 : Float(1!, 2) = onnx::Transpose[perm=[1, 0]](%1), scope: TestModel2/Sequential[seq]/Linear[0]
  %6 : Float(2) = onnx::MatMul(%0, %5), scope: TestModel2/Sequential[seq]/Linear[0]
  %7 : Float(2) = onnx::Add(%6, %2), scope: TestModel2/Sequential[seq]/Linear[0]
  %8 : Float(2) = onnx::Relu(%7), scope: TestModel2/Sequential[seq]/ReLU[1]
  %9 : Float(2!, 3!) = onnx::Transpose[perm=[1, 0]](%3), scope: TestModel2/Sequential[seq]/Linear[2]
  %10 : Float(3) = onnx::MatMul(%8, %9), scope: TestModel2/Sequential[seq]/Linear[2]
  %11 : Float(3) = onnx::Add(%10, %4), scope: TestModel2/Sequential[seq]/Linear[2]
  %12 : Float(3) = onnx::Softmax[axis=0](%11), scope: TestModel2/Sequential[seq]/Softmax[3]
  return (%12);
}
So both models result in the same ONNX graph with the same operations.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With