I just begin to learn Pytorch and create my first CNN. The dataset contains 3360 RGB images and I converted them to a [3360, 3, 224, 224]
tensor. The data and label are in the dataset(torch.utils.data.TensorDataset)
. Below is the training code.
def train_net():
dataset = ld.load()
data_iter = Data.DataLoader(dataset, batch_size=168, shuffle=True)
net = model.VGG_19()
summary(net, (3, 224, 224), device="cpu")
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, dampening=0.1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
for epoch in range(5):
print("epoch:", epoch + 1)
train_loss = 0
for i, data in enumerate(data_iter, 0):
x, y = data
print(x.dtype)
optimizer.zero_grad()
out = net(x)
loss = loss_func(out, y)
loss.backward()
optimizer.step()
train_loss += loss.item()
if i % 100 == 99:
print("loss:", train_loss / 100)
train_loss = 0.0
print("finish train")
Then I have this error:
Traceback (most recent call last):
File "D:/python/DeepLearning/VGG/train.py", line 52, in <module>
train_net()
File "D:/python/DeepLearning/VGG/train.py", line 29, in train_net
out = net(x)
File "D:\python\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "D:\python\DeepLearning\VGG\model.py", line 37, in forward
out = self.conv3_64(x)
File "D:\python\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "D:\python\lib\site-packages\torch\nn\modules\container.py", line 117, in forward
input = module(input)
File "D:\python\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "D:\python\lib\site-packages\torch\nn\modules\conv.py", line 423, in forward
return self._conv_forward(input, self.weight)
File "D:\python\lib\site-packages\torch\nn\modules\conv.py", line 419, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: expected scalar type Double but found Float
I think there is something wrong with x and I print its type by print(x.dtype)
:
torch.float64
which is double instead of float. Do you know what`s wrong? Thanks for your help!
that error is actually refering to the weights of the conv layer which are in float32
by default when the matrix multiplication is called. Since your input is double
(float64
in pytorch) while the weights in conv are float
So the solution in your case is :
def train_net():
dataset = ld.load()
data_iter = Data.DataLoader(dataset, batch_size=168, shuffle=True)
net = model.VGG_19()
summary(net, (3, 224, 224), device="cpu")
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, dampening=0.1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
for epoch in range(5):
print("epoch:", epoch + 1)
train_loss = 0
for i, data in enumerate(data_iter, 0):
x, y = data # //_______________
x = x.float() # HERE IS THE CHANGE \\
print(x.dtype)
optimizer.zero_grad()
out = net(x)
loss = loss_func(out, y)
loss.backward()
optimizer.step()
train_loss += loss.item()
if i % 100 == 99:
print("loss:", train_loss / 100)
train_loss = 0.0
print("finish train")
This will work for sure
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With