tensorflow lite model gives very different accuracy value compared to python model

I am using tensorflow 1.10 Python 3.6

My code is based in the premade iris classification model provided by TensorFlow. This means, I am using a Tensorflow DNN premade classifier, with the following difference:

  • 10 features instead 4.
  • 5 classes instead 3.

The test and training files can be downloaded from the following link: https://www.dropbox.com/sh/nmu8i2i8xe6hvfq/AADQEOIHH8e-kUHQf8zmmDMDa?dl=0

I have made a code to export this classifier to a tflite format, however the accuracy in the python model is higher than 75% but when exported the accuracy decrease approximately to 45% this means approximately 30% Accuracy is lost (This is too much). I have tried the code with different set of data and in all of them the accuracy after exporting decrease a lot! This made me think that something is going wrong with the TocoConverter function or that maybe I am exporting to tflite incorrectly, missing a parameter or something like that.

This is the way I generate the model:

classifier = tf.estimator.DNNClassifier(
        hidden_units=[100, 500],

And this is the function I am using to convert to tflite:

converter = tf.contrib.lite.TocoConverter.from_frozen_graph(final_model_path, input_arrays, output_arrays, input_shapes={"dnn/input_from_feature_columns/input_layer/concat": [1, 10]})
        tflite_model = converter.convert()

I share the complete code in which I also calculate the accuracy of the resulting .tflite file.

import argparse
import tensorflow as tf

import pandas as pd
import csv

from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument('--train_steps', default=1000, type=int,
                    help='number of training steps')

features_global = None
feature_spec = None

MODEL_NAME = 'myModel'

def load_data(train_path, test_path):
    """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""

    with open(train_path, newline='') as f:
        reader = csv.reader(f)
        column_names = next(reader)

    y_name = column_names[-1]

    train = pd.read_csv(train_path, names=column_names, header=0)
    train_x, train_y = train, train.pop(y_name)

    test = pd.read_csv(test_path, names=column_names, header=0)
    test_x, test_y = test, test.pop(y_name)

    return (train_x, train_y), (test_x, test_y)

def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset

def eval_input_fn(features, labels, batch_size):
    """An input function for evaluation or prediction"""
    if labels is None:
        # No labels, use only features.
        inputs = features
        inputs = (features, labels)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the dataset.
    return dataset

def main(argv):
    args = parser.parse_args(argv[1:])

    train_path = "trainData.csv"
    test_path = "testData.csv"

    # Fetch the data
    (train_x, train_y), (test_x, test_y) = load_data(train_path, test_path)

    # Load labels
    num_labels = 5

    # Feature columns describe how to use the input.
    my_feature_columns = []
    for key in train_x.keys():

    # Build 2 hidden layer DNN
    classifier = tf.estimator.DNNClassifier(
        hidden_units=[100, 500],
        # The model must choose between 'num_labels' classes.

    # Train the Model
        input_fn=lambda:train_input_fn(train_x, train_y,

    # Evaluate the model.
    eval_result = classifier.evaluate(
        input_fn=lambda:eval_input_fn(test_x, test_y,

    print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

    # Export model
    feature_spec = tf.feature_column.make_parse_example_spec(my_feature_columns)
    serve_input_fun = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
    saved_model_path = classifier.export_savedmodel(
    var = tf.Variable(0)
    with tf.Session() as sess:
        # First let's load meta graph and restore weights
        latest_checkpoint_path = classifier.latest_checkpoint()
        saver = tf.train.import_meta_graph(latest_checkpoint_path + '.meta')
        saver.restore(sess, latest_checkpoint_path)

        input_arrays = ["dnn/input_from_feature_columns/input_layer/concat"]
        output_arrays = ["dnn/logits/BiasAdd"]

        frozen_graph_def = tf.graph_util.convert_variables_to_constants(
            sess, sess.graph_def,

        frozen_graph = "out/frozen_graph.pb"

        with tf.gfile.FastGFile(frozen_graph, "wb") as f:

        # save original graphdef to text file
        with open("estimator_graph.pbtxt", "w") as fp:
        # save frozen graph def to text file
        with open("estimator_frozen_graph.pbtxt", "w") as fp:

        input_node_names = input_arrays
        output_node_name = output_arrays
        output_graph_def = optimize_for_inference_lib.optimize_for_inference(
                frozen_graph_def, input_node_names, output_node_name,

        final_model_path = 'out/opt_' + MODEL_NAME + '.pb'
        with tf.gfile.FastGFile(final_model_path, "wb") as f:

        tflite_file = "out/iris.tflite"

        converter = tf.contrib.lite.TocoConverter.from_frozen_graph(final_model_path, input_arrays, output_arrays, input_shapes={"dnn/input_from_feature_columns/input_layer/concat": [1, 10]})
        tflite_model = converter.convert()
        open(tflite_file, "wb").write(tflite_model)

        interpreter = tf.contrib.lite.Interpreter(model_path=tflite_file)

        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        # Test model on random input data.
        input_shape = input_details[0]['shape']
        # change the following line to feed into your own data.
        input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
        resultlist = list()
        df = pd.read_csv(test_path)
        expected = df.iloc[:, -1].values.tolist()
        with open(test_path, newline='') as f:
            reader = csv.reader(f)
            column_names = next(reader)
            for x in range(0, len(expected)):
                linea = next(reader)
                linea = linea[:len(linea) - 1]
                input_data2 = np.array(linea, dtype=np.float32)
                interpreter.set_tensor(input_details[0]['index'], [input_data2])
                output_data = interpreter.get_tensor(output_details[0]['index'])
                max = 0;
                longitud = len(output_data[0])

                for k in range(0, longitud):
                    if (output_data[0][k] > output_data[0][max]):
                        max = k

        coincidences = 0
        for pred_dict, expec in zip(resultlist, expected):
            if pred_dict == expec:
                coincidences = coincidences + 1

        print("tflite Accuracy: " + str(coincidences / len(expected)))

if __name__ == '__main__':

I hope some of you can identify the error, or give a possible solution

2 Answers

This question is answered here might help.

As mentioned in the answer share, doing some


on the image before it is fed into "interpreter.invoke()" solves the issue if that was the problem in the first place.

To elaborate on that here is a block quote from the shared link:

The below code you see is what I meant by pre-processing:

test_image = cv2.imread(file_name)

test_image = cv2.resize(test_image,(299,299),cv2.INTER_AREA)

test_image = np.expand_dims((test_image)/255,axis=0).astype(np.float32)

interpreter.set_tensor(input_tensor_index, test_image)


digit = np.argmax(output()[0])


prediction = result[digit]

As you can see there are two crucial commands/pre-processing done on the image once it is read using "imread()":

i) The image should be resized to the size that is the "input_height" and "input_width" values of the input image/tensor that was used during the training. In my case (inception-v3) this was 299 for both "input_height" and "input_width". (Read the documentation of the model for this value or look for this variable in the file that you used to train or retrain the model)

ii) The next command in the above code is:

test_image = np.expand_dims((test_image)/255,axis=0).astype(np.float32)

I got this from the "formulae"/model code:

test_image = np.expand_dims((test_image - input_mean)/input_std, axis=0).astype(np.float32)

Reading the documentation revealed that for my architecture input_mean = 0 and input_std = 255.

Hope this helps.

I have met the same problem. It seems to me that the accuracy problem is mainly caused by failure to detect overlapping objects. I couldn't figure out what part of the code is wrong though.

