I am trying to predict the Global Sales from the values 'Name', 'Platform', 'Genre', 'Publisher' and 'Year' from this dataset here: https://www.kaggle.com/gregorut/videogamesales
This is my code for training the model:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
import tensorflow as tf
dftrain = pd.read_csv('./vgsales_eval.csv')
dfeval = pd.read_csv('./vgsales_train.csv')
print(dftrain[dftrain.isnull().any(axis=1)])
y_train = dftrain.pop('Global_Sales')
y_eval = dfeval.pop('Global_Sales')
CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
vocabulary = dftrain[feature_name].unique() # gets a list of all unique values from given feature column
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))
print(feature_columns)
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
def input_function():
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds = ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_function
train_input_fn = make_input_fn(dftrain, y_train)
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
I get the following error:
Traceback (most recent call last):
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 93, in normalize_element
spec = type_spec_from_value(t, use_fallback=False)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 466, in type_spec_from_value
(element, type(element).__name__))
TypeError: Could not build a TypeSpec for 0 Tecmo Koei
1 Nippon Ichi Software
2 Ubisoft
3 Activision
4 Atari
...
6594 Kemco
6595 Infogrames
6596 Activision
6597 7G//AMES
6598 Wanadoo
Name: Publisher, Length: 6599, dtype: object with type Series
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 45, in <module>
linear_est.train(train_input_fn)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1201, in _train_model_default
self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1037, in _get_features_and_labels_from_input_fn
self._call_input_fn(input_fn, mode))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1130, in _call_input_fn
return input_fn(**kwargs)
File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 34, in input_function
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 682, in from_tensor_slices
return TensorSliceDataset(tensors)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 3001, in __init__
element = structure.normalize_element(element)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 98, in normalize_element
ops.convert_to_tensor(t, name="component_%d" % i))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1499, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 338, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
allow_broadcast=True)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 282, in _constant_impl
allow_broadcast=allow_broadcast))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 563, in make_tensor_proto
append_fn(tensor_proto, proto_values)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in SlowAppendObjectArrayToTensorProto
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in <listcomp>
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\compat.py", line 87, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got nan
What am I doing wrong here? Is it a problem with the dataset or do I have to read the values differently?
This is basically with the null
values present in the data which you have taken, you need to handle it when you load the data.
I have done couple of changes.
df.fillna
based on the columns and the values you need to fill in it considering data type.Year
datatype from float
to int
. Since it would lead to another problem for tensor_slices
.Below is the modified code with the same data you have taken.
df = pd.read_csv('/content/vgsales.csv')
# print(df.head())
print(df[df.isnull().any(axis=1)])
# df.fillna('', inplace=True)
df.dropna(how="any",inplace = True)
df.Year = df.Year.astype(int)
CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
vocabulary = df[feature_name].unique() # gets a list of all unique values from given feature column
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))
print(feature_columns)
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
def input_function():
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds = ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_function
train_input_fn = make_input_fn(df, y_train)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With