I am trying to run a DNNClassifier with TensorFlow on some log data that contains a mix of categorical and numeric data. I have created feature columns to specify and bucketize/hash the data for tensorflow. When I run the code I receive the 'Unable to get element as bytes' internal error. Note: I did not want to drop the Nan values as stated in this article so I converted them to 0 using this code train = train.fillna(0, axis=0)
so I am not sure why I am still gettting this error. If I dropna then it works but I do not want to drop the Nan's as I feel they are needed for the model to train.
def create_train_input_fn():
return tf.estimator.inputs.pandas_input_fn(
x=train,
y=train_label,
batch_size=32,
num_epochs=None,
shuffle=True)
def create_test_input_fn():
return tf.estimator.inputs.pandas_input_fn(
x=valid,
y=valid_label,
num_epochs=1,
shuffle=False)
feature_columns = []
end_time = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('end_time', 1000), 10)
feature_columns.append(end_time)
device = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device', 1000), 10)
feature_columns.append(device)
device_os = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os', 1000), 10)
feature_columns.append(device_os)
device_os_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os_version', 1000), 10)
feature_columns.append(device_os_version)
Latency = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('Latency'),
boundaries=[.000000, .000010, .000100, .001000, .010000, .100000])
feature_columns.append(Latency)
Megacycles = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('Megacycles'),
boundaries=[0, 50, 100, 200, 300])
feature_columns.append(Megacycles)
Cost = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('Cost'),
boundaries=[0.000001e-08, 1.000000e-08, 5.000000e-08, 10.000000e-08, 15.000000e-08 ])
feature_columns.append(Cost)
device_brand = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_brand', 1000), 10)
feature_columns.append(device_brand)
device_family = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_family', 1000), 10)
feature_columns.append(device_family)
browser_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('browser_version', 1000), 10)
feature_columns.append(browser_version)
app = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('app', 1000), 10)
feature_columns.append(app)
ua_parse = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('ua_parse', 1000), 10)
feature_columns.append(ua_parse)
estimator = tf.estimator.DNNClassifier(hidden_units=[256, 128, 64],
feature_columns=feature_columns,
n_classes=2,
model_dir='graphs/dnn')
train_input_fn = create_train_input_fn()
estimator.train(train_input_fn, steps=2000)
Then I receive this error:
InternalErrorTraceback (most recent call last)
<ipython-input-67-6abd6f1afc3a> in <module>()
1 train_input_fn = create_train_input_fn()
----> 2 estimator.train(train_input_fn, steps=2000)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
312
313 saving_listeners = _check_listeners_type(saving_listeners)
--> 314 loss = self._train_model(input_fn, hooks, saving_listeners)
315 logging.info('Loss for final step: %s.', loss)
316 return self
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
813 loss = None
814 while not mon_sess.should_stop():
--> 815 _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
816 return loss
817
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in __exit__(self, exception_type, exception_value, traceback)
649 if exception_type in [errors.OutOfRangeError, StopIteration]:
650 exception_type = None
--> 651 self._close_internal(exception_type)
652 # __exit__ should return True to suppress an exception.
653 return exception_type is None
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in _close_internal(self, exception_type)
686 if self._sess is None:
687 raise RuntimeError('Session is already closed.')
--> 688 self._sess.close()
689 finally:
690 self._sess = None
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in close(self)
932 if self._sess:
933 try:
--> 934 self._sess.close()
935 except _PREEMPTION_ERRORS:
936 pass
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in close(self)
1076 self._coord.join(
1077 stop_grace_period_secs=self._stop_grace_period_secs,
-> 1078 ignore_live_threads=True)
1079 finally:
1080 try:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/coordinator.pyc in join(self, threads, stop_grace_period_secs, ignore_live_threads)
385 self._registered_threads = set()
386 if self._exc_info_to_raise:
--> 387 six.reraise(*self._exc_info_to_raise)
388 elif stragglers:
389 if ignore_live_threads:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.pyc in _run(self, sess, enqueue_op, feed_fn, coord)
92 try:
93 feed_dict = None if feed_fn is None else feed_fn()
---> 94 sess.run(enqueue_op, feed_dict=feed_dict)
95 except (errors.OutOfRangeError, errors.CancelledError):
96 # This exception indicates that a queue was closed.
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
893 try:
894 result = self._run(None, fetches, feed_dict, options_ptr,
--> 895 run_metadata_ptr)
896 if run_metadata:
897 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
1126 if final_fetches or final_targets or (handle and feed_dict_tensor):
1127 results = self._do_run(handle, final_targets, final_fetches,
-> 1128 feed_dict_tensor, options, run_metadata)
1129 else:
1130 results = []
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1342 if handle is None:
1343 return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1344 options, run_metadata)
1345 else:
1346 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1361 except KeyError:
1362 pass
-> 1363 raise type(e)(node_def, op, message)
1364
1365 def _extend_graph(self):
InternalError: Unable to get element as bytes.
I agree with Thomas Decaux. I experienced exactly the same issue. I checked that my labels were represented as strings ("yes" and "no) rather than integers (1,0). After converting labels to int64 no such errors appeared.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With