I am working on the classic example with digits. I want to create a my first neural network that predict the labels of digit images {0,1,2,3,4,5,6,7,8,9}. So the first column of train.txt has the labels and all the other columns are the features of each label. I have defined a class to import my data:
class DigitDataset(Dataset):
"""Digit dataset."""
def __init__(self, file_path, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.data = pd.read_csv(file_path, header = None, sep =" ")
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
labels = self.data.iloc[idx,0]
images = self.data.iloc[idx,1:-1].values.astype(np.uint8).reshape((1,16,16))
if self.transform is not None:
sample = self.transform(sample)
return images, labels
And then I am running these commands to split my dataset into batches, to define a model and a loss:
train_dataset = DigitDataset("train.txt")
train_loader = DataLoader(train_dataset, batch_size=64,
shuffle=True, num_workers=4)
# Model creation with neural net Sequential model
model=nn.Sequential(nn.Linear(256, 128), # 1 layer:- 256 input 128 o/p
nn.ReLU(), # Defining Regular linear unit as activation
nn.Linear(128,64), # 2 Layer:- 128 Input and 64 O/p
nn.Tanh(), # Defining Regular linear unit as activation
nn.Linear(64,10), # 3 Layer:- 64 Input and 10 O/P as (0-9)
nn.LogSoftmax(dim=1) # Defining the log softmax to find the probablities
for the last output unit
)
# defining the negative log-likelihood loss for calculating loss
criterion = nn.NLLLoss()
images, labels = next(iter(train_loader))
images = images.view(images.shape[0], -1)
logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL-loss
And I take the error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-2-7f4160c1f086> in <module>
47 images = images.view(images.shape[0], -1)
48
---> 49 logps = model(images) #log probabilities
50 loss = criterion(logps, labels) #calculate the NLL-loss
~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self,
*input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/container.py in forward(self, input)
115 def forward(self, input):
116 for module in self:
--> 117 input = module(input)
118 return input
119
~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self,
*input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py in forward(self, input)
91
92 def forward(self, input: Tensor) -> Tensor:
---> 93 return F.linear(input, self.weight, self.bias)
94
95 def extra_repr(self) -> str:
~/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1688 if input.dim() == 2 and bias is not None:
1689 # fused op is marginally faster
-> 1690 ret = torch.addmm(bias, input, weight.t())
1691 else:
1692 output = input.matmul(weight.t())
RuntimeError: expected scalar type Float but found Byte
Do you know what is wrong? Thank you for your patience and help!
This line is the cause of your error:
images = self.data.iloc[idx, 1:-1].values.astype(np.uint8).reshape((1, 16, 16))
images are uint8 (byte) while the neural network needs inputs as floating point in order to calculate gradients (you can't calculate gradients for backprop using integers as those are not continuous and non-differentiable).
You can use torchvision.transforms.functional.to_tensor to convert the image into float and into [0, 1] like this:
import torchvision
images = torchvision.transforms.functional.to_tensor(
self.data.iloc[idx, 1:-1].values.astype(np.uint8).reshape((1, 16, 16))
)
or simply divide by 255 to get values into [0, 1].
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With