I am using TensorFlow 2.0.0 and trying to create my own data set with tf.data.Dataset.from_generator()
Here are my codes:
def trainDatagen():
for npy in train_list:
x = tf.convert_to_tensor(np.load(npy), dtype=tf.float32)
if npy in gbmlist:
y = to_categorical(0, num_classes=2)
else:
y = to_categorical(1, num_classes=2)
yield x, y
def tfDatasetGen(datagen, output_types, is_training, batch_size):
dataset = tf.data.Dataset.from_generator(generator=datagen, output_types=output_types)
if is_training:
dataset.shuffle(buffer_size=100)
dataset.repeat()
dataset.batch(batch_size=batch_size)
dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
train_set = tfDatasetGen(
datagen = trainDatagen,
output_types = (tf.float32, tf.float32),
is_training = True,
batch_size = 16)
All those npy files are np.array with shape of [4000,2048] got from large pathology slides with 4000 tiles. Feature of each tile was calculated by ResNet50.
Here is my model:
def top_k(inputs, k):
return tf.nn.top_k(inputs, k=k, sorted=True).values
def least_k(inputs, k):
return -tf.nn.top_k(-inputs, k=k, sorted=True).values
def minmax_k(inputs, k):
return tf.concat([top_k(inputs, k), least_k(inputs, k)], axis = -1)
inputs = keras.Input(shape=(4000,2048))
y = layers.Conv1D(1, 2048, use_bias=False, padding='same', data_format='channels_last')(inputs)
y = layers.Flatten()(y)
y = layers.Lambda(minmax_k, arguments={'k': 5})(y)
y = layers.Dense(units=200, activation=tf.nn.relu)(y)
y = layers.Dropout(rate=0.5)(y)
y = layers.Dense(units=100, activation=tf.nn.relu)(y)
y = layers.Dense(units=2, activation=tf.nn.softmax)(y)
model = keras.Model(inputs=inputs, outputs=y)
When using model.fit() to train the model, I received this:
ValueError: Error when checking input: expected input_4 to have 3 dimensions, but got array with shape (4000, 2048)
All this idea is from the paper arXiv:1802.02212. Here is the figure of the neural network I tried to reproduce.
I followed Mahsa Hassankashi's advice to reshape the input to (4000,2048,1)
x = tf.convert_to_tensor(np.load(npy).reshape(4000,2048,1), dtype=tf.float32)
and modified this part to fix an error according to GitHub issues:
train_set = tfDatasetGen(
datagen = trainDatagen,
output_types = (tf.float32, tf.float32),
**output_shapes = (tf.TensorShape((None,None,None)), tf.TensorShape((2,))),**
is_training = True,
batch_size = 16)
But I got this:
InvalidArgumentError: input and filter must have the same depth: 1 vs 2048
Finally I tried to reshape the input to (1,4000,2048), this time another kind of error came to me:
InvalidArgumentError: Expected size[0] in [0, 1], but got 2
Please look at train list and if it needs 2 dimensional for Convolution NN, use:
convolution2d
difference between 1D, 2D, and 3D convolutions in convolutional neural networks (in deep learning)
CNN Dimensional
Otherwise for last bug:
InvalidArgumentError: Expected size[0] in [0, 1], but got 2
When the generated element is a Tensor, from_generator function would flatten it to output_types. And this conversion will not work.
The solution is, use from_tensors or from_tensor_slices instead of from_generator when the generator generates a tensor.
Please test below solutions:
Could you please test it:
1.tensorflow gpu
conda create --name tensorflow
activate tensorflow
pip install tensorflow
pip install tensorflow-gpu
2.Timesteps According to this your convolution1d needs 3 dimensions and convolution2d needs 4.
input_shape = (timesteps, input_dim)
timesteps=1
Then reshape the X_train and X_test as:
X1_Train = X1_Train.reshape((4000,2048,1))
#call model.fit()
3.Use
model.fit_generator()
4.Add flatten before the last dense.
model.add(Flatten())
keras convolution_layers
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With