from random import sample
index=sample(range(0, len(result)), len(result)//5*4)
description_train=[child[0] for i, child in enumerate(result) if i in index]
ipc_train=[child[1] for i, child in enumerate(result) if i in index]
description_test=[child[0] for i, child in enumerate(result) if i not in index]
ipc_test=[child[1] for i, child in enumerate(result) if i not in index]
import numpy as np
def to_onehot(li):
result=np.zeros(8)
if 'A' in li:
result[0]=1
if 'B' in li:
result[1]=1
if 'C' in li:
result[2]=1
if 'D' in li:
result[3]=1
if 'E' in li:
result[4]=1
if 'F' in li:
result[5]=1
if 'G' in li:
result[6]=1
if 'H' in li:
result[7]=1
return result
from tensorflow.python.keras.preprocessing.text import Tokenizer
max_words=100000
num_classes=8
t=Tokenizer(num_words=max_words)
t.fit_on_texts(description_train)
X_train=t.texts_to_matrix(description_train, mode='binary')
X_test=t.texts_to_matrix(description_test, mode='binary')
Y_train=np.array([to_onehot(child) for child in ipc_train], dtype=np.int32)
Y_test=np.array([to_onehot(child) for child in ipc_test], dtype=np.int32)
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout
model = Sequential()
model.add(Dense(1024, input_shape=(max_words,), activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=128, epochs=5, validation_split=0.1)
the last line (model.fit) result in a following error.
InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run GatherV2: Dst tensor is not initialized. [Op:GatherV2]
How can I fix it? thank you in advance.
I had this error very often, even with high-RAM EC2 instances. The only solution for me was to use generators:
from tensorflow.keras.utils import Sequence
import numpy as np
class DataGenerator(Sequence):
def __init__(self, x_set, y_set, batch_size):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
return batch_x, batch_y
train_gen = DataGenerator(X_train, y_train, 32)
test_gen = DataGenerator(X_test, y_test, 32)
history = model.fit(train_gen,
epochs=6,
validation_data=test_gen)
In the above example, we assume that X and y are numpy arrays.
My guess on what's happening: even though I'm using a high-RAM instance, I suspect the problem is a limitation in the GPU memory, and even though I'm training in batches, when not using generators, TensorFlow is trying to load the full array into the GPU memory.
One memory-costly process is the internal conversion from (usually) numpy to tf.tensor. If you are sure that your GPU should be able to handle the batches: Manually convert the data to tf tensors using the CPU RAM, and only then pass it to your model (eventually using GPU).
with tf.device('/cpu:0'):
x = tf.convert_to_tensor(x, np.float32)
y = tf.convert_to_tensor(y, np.float32)
and then, outside of the with statement:
model.fit(x=x, y=y)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With