TypeError: Could not build a TypeSpec for a column

Question

I am trying to predict the Global Sales from the values 'Name', 'Platform', 'Genre', 'Publisher' and 'Year' from this dataset here: https://www.kaggle.com/gregorut/videogamesales

This is my code for training the model:

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow as tf

dftrain = pd.read_csv('./vgsales_eval.csv')
dfeval = pd.read_csv('./vgsales_train.csv')

print(dftrain[dftrain.isnull().any(axis=1)])

y_train = dftrain.pop('Global_Sales')
y_eval = dfeval.pop('Global_Sales')

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(dftrain, y_train)  
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)

I get the following error:

Traceback (most recent call last):
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\data\util\structure.py", line 93, in normalize_element
    spec = type_spec_from_value(t, use_fallback=False)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\data\util\structure.py", line 466, in type_spec_from_value
    (element, type(element).__name__))
TypeError: Could not build a TypeSpec for 0                 Tecmo Koei
1       Nippon Ichi Software
2                    Ubisoft
3                 Activision
4                      Atari
                ...
6594                   Kemco
6595              Infogrames
6596              Activision
6597                7G//AMES
6598                 Wanadoo
Name: Publisher, Length: 6599, dtype: object with type Series

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 45, in <module>
    linear_est.train(train_input_fn)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow_estimator\python\estimator\estimator.py", line 349, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow_estimator\python\estimator\estimator.py", line 1175, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow_estimator\python\estimator\estimator.py", line 1201, in _train_model_default
    self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow_estimator\python\estimator\estimator.py", line 1037, in _get_features_and_labels_from_input_fn
    self._call_input_fn(input_fn, mode))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow_estimator\python\estimator\estimator.py", line 1130, in _call_input_fn
    return input_fn(**kwargs)
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 34, in input_function
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\data\ops\dataset_ops.py", line 682, in from_tensor_slices
    return TensorSliceDataset(tensors)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\data\ops\dataset_ops.py", line 3001, in __init__
    element = structure.normalize_element(element)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\data\util\structure.py", line 98, in normalize_element
    ops.convert_to_tensor(t, name="component_%d" % i))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework\ops.py", line 1499, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework\constant_op.py", line 338, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework\constant_op.py", line 264, in constant
    allow_broadcast=True)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework\constant_op.py", line 282, in _constant_impl
    allow_broadcast=allow_broadcast))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework	ensor_util.py", line 563, in make_tensor_proto
    append_fn(tensor_proto, proto_values)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework	ensor_util.py", line 155, in SlowAppendObjectArrayToTensorProto
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\framework	ensor_util.py", line 155, in <listcomp>
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\util\compat.py", line 87, in as_bytes
    (bytes_or_text,))
TypeError: Expected binary or unicode string, got nan

What am I doing wrong here? Is it a problem with the dataset or do I have to read the values differently?

Tfer3 · Accepted Answer

This is basically with the null values present in the data which you have taken, you need to handle it when you load the data.

I have done couple of changes.

To drop the record of the null value, you can also perform df.fillna based on the columns and the values you need to fill in it considering data type.
I have changed the column Year datatype from float to int. Since it would lead to another problem for tensor_slices.

Below is the modified code with the same data you have taken.

df = pd.read_csv('/content/vgsales.csv')
# print(df.head())
print(df[df.isnull().any(axis=1)])
# df.fillna('', inplace=True)
df.dropna(how="any",inplace = True)
df.Year = df.Year.astype(int) 

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year'] 

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = df[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(df, y_train)  
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)

TypeError: Could not build a TypeSpec for a column

Tags:

python

python-3.x

machine-learning

tensorflow

Lukas Kuhn

1 Answers

Tfer3

Recent Activity

Donate For Us

TypeError: Could not build a TypeSpec for a column

Tags:

python

python-3.x

machine-learning

tensorflow

Lukas Kuhn

1 Answers

Tfer3

Related questions

Recent Activity

Donate For Us