Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

TypeError: Could not build a TypeSpec for a column

I am trying to predict the Global Sales from the values 'Name', 'Platform', 'Genre', 'Publisher' and 'Year' from this dataset here: https://www.kaggle.com/gregorut/videogamesales

This is my code for training the model:

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow as tf

dftrain = pd.read_csv('./vgsales_eval.csv')
dfeval = pd.read_csv('./vgsales_train.csv')

print(dftrain[dftrain.isnull().any(axis=1)])

y_train = dftrain.pop('Global_Sales')
y_eval = dfeval.pop('Global_Sales')

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(dftrain, y_train)  
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)

I get the following error:

Traceback (most recent call last):
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 93, in normalize_element
    spec = type_spec_from_value(t, use_fallback=False)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 466, in type_spec_from_value
    (element, type(element).__name__))
TypeError: Could not build a TypeSpec for 0                 Tecmo Koei
1       Nippon Ichi Software
2                    Ubisoft
3                 Activision
4                      Atari
                ...
6594                   Kemco
6595              Infogrames
6596              Activision
6597                7G//AMES
6598                 Wanadoo
Name: Publisher, Length: 6599, dtype: object with type Series

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 45, in <module>
    linear_est.train(train_input_fn)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 349, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1175, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1201, in _train_model_default
    self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1037, in _get_features_and_labels_from_input_fn
    self._call_input_fn(input_fn, mode))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1130, in _call_input_fn
    return input_fn(**kwargs)
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 34, in input_function
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 682, in from_tensor_slices
    return TensorSliceDataset(tensors)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 3001, in __init__
    element = structure.normalize_element(element)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 98, in normalize_element
    ops.convert_to_tensor(t, name="component_%d" % i))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1499, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 338, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
    allow_broadcast=True)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 282, in _constant_impl
    allow_broadcast=allow_broadcast))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 563, in make_tensor_proto
    append_fn(tensor_proto, proto_values)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in SlowAppendObjectArrayToTensorProto
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in <listcomp>
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\compat.py", line 87, in as_bytes
    (bytes_or_text,))
TypeError: Expected binary or unicode string, got nan

What am I doing wrong here? Is it a problem with the dataset or do I have to read the values differently?

like image 311
Lukas Kuhn Avatar asked Jul 28 '20 15:07

Lukas Kuhn


1 Answers

This is basically with the null values present in the data which you have taken, you need to handle it when you load the data.

I have done couple of changes.

  1. To drop the record of the null value, you can also perform df.fillna based on the columns and the values you need to fill in it considering data type.
  2. I have changed the column Year datatype from float to int. Since it would lead to another problem for tensor_slices.

Below is the modified code with the same data you have taken.

df = pd.read_csv('/content/vgsales.csv')
# print(df.head())
print(df[df.isnull().any(axis=1)])
# df.fillna('', inplace=True)
df.dropna(how="any",inplace = True)
df.Year = df.Year.astype(int) 

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year'] 

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = df[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(df, y_train)  
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
like image 50
Tfer3 Avatar answered Nov 03 '22 13:11

Tfer3