Tensorflow: model wrapper that can release GPU resources

Here is a wrapper for tensorflow .pb frozen model (imagenet classification):

import tensorflow as tf
import numpy as np
import cv2
from numba import cuda


class ModelWrapper():
    def __init__(self, model_filepath):
        self.graph_def = self.load_graph_def(model_filepath)
        self.graph = self.load_graph(self.graph_def)
        self.set_inputs_and_outputs()
        self.sess = tf.Session(graph=self.graph)

        print(self.__class__.__name__, 'call __init__')  #

    def load_graph_def(self, model_filepath):
        # Expects frozen graph in .pb format
        with tf.gfile.GFile(model_filepath, "rb") as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
        return graph_def

    def load_graph(self, graph_def):
        with tf.Graph().as_default() as graph:
            tf.import_graph_def(graph_def, name="")
        return graph

    def set_inputs_and_outputs(self):
        input_list = []
        for op in self.graph.get_operations():  # tensorflow.python.framework.ops.Operation
            if op.type == "Placeholder":
                input_list.append(op.name)
        print('Inputs:', input_list)

        all_name_list = []
        input_name_list = []
        for node in self.graph_def.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
            all_name_list.append(node.name)
            input_name_list.extend(node.input)
        output_list = list(set(all_name_list) - set(input_name_list))
        print('Outputs:', output_list)

        self.inputs = []
        self.input_tensor_names = [name + ":0" for name in input_list]
        for input_tensor_name in self.input_tensor_names:
            self.inputs.append(self.graph.get_tensor_by_name(input_tensor_name))

        self.outputs = []
        self.output_tensor_names = [name + ":0" for name in output_list]
        for output_tensor_name in self.output_tensor_names:
            self.outputs.append(self.graph.get_tensor_by_name(output_tensor_name))

        input_dim_list = []
        for op in self.graph.get_operations(): # tensorflow.python.framework.ops.Operation
            if op.type == "Placeholder":
                bs = op.get_attr('shape').dim[0].size
                h = op.get_attr('shape').dim[1].size
                w = op.get_attr('shape').dim[2].size
                c = op.get_attr('shape').dim[3].size
                input_dim_list.append([bs, h, w ,c])
        assert len(input_dim_list) == 1
        _, self.input_img_h, self.input_img_w, _ = input_dim_list[0]

    def predict(self, img):
        h, w, c = img.shape
        if h != self.input_img_h or w != self.input_img_w:
            img = cv2.resize(img, (self.input_img_w, self.input_img_h))

        batch = img[np.newaxis, ...]
        feed_dict = {self.inputs[0]: batch}
        outputs = self.sess.run(self.outputs, feed_dict=feed_dict) # (1, 1001)
        output = outputs[0]
        return output

    def __del__(self):
        print(self.__class__.__name__, 'call __del__') #
        import time #
        time.sleep(3) #
        cuda.close()

What I'm trying to do is to clean up GPU memory after I don't need model anymore, in this example I just create and delete model in the loop, but in real life it can be several different models.

wget https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz
tar -xvzf inception_v3_2016_08_28_frozen.pb.tar.gz
rm -f imagenet_slim_labels.txt
rm -f inception_v3_2016_08_28_frozen.pb.tar.gz

import os
import time

import tensorflow as tf
import numpy as np

from model_wrapper import ModelWrapper

MODEL_FILEPATH = './inception_v3_2016_08_28_frozen.pb'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


def create_and_delete_in_loop():
    for i in range(10):
        print('-'*60)
        print('i:', i)
        model = ModelWrapper(MODEL_FILEPATH)
        input_batch = np.zeros((model.input_img_h, model.input_img_w, 3), np.uint8)
        y_pred = model.predict(input_batch)
        print('y_pred.shape', y_pred.shape)
        print('np.argmax(y_pred)', np.argmax(y_pred))
        del model


if __name__ == "__main__":
    create_and_delete_in_loop()

    print('START WAITING')
    time.sleep(10)
    print('END OF THE PROGRAM!')

Output:

------------------------------------------------------------
i: 0
Inputs: ['input']
Outputs: ['InceptionV3/Predictions/Reshape_1']
ModelWrapper call __init__
y_pred.shape (1, 1001)
np.argmax(y_pred) 112
ModelWrapper call __del__
------------------------------------------------------------
i: 1
Inputs: ['input']
Outputs: ['InceptionV3/Predictions/Reshape_1']
ModelWrapper call __init__
Segmentation fault (core dumped)

What is the proper way of releasing GPU memory?

How do I allocate GPU memory with TensorFlow?

To limit TensorFlow to a specific set of GPUs, use the tf. config. set_visible_devices method. In some cases it is desirable for the process to only allocate a subset of the available memory, or to only grow the memory usage as is needed by the process.

What is a .PB file TensorFlow?

The . pb format is the protocol buffer (protobuf) format, and in Tensorflow, this format is used to hold models. Protobufs are a general way to store data by Google that is much nicer to transport, as it compacts the data more efficiently and enforces a structure to the data.

Does TensorFlow use GPU automatically?

If a TensorFlow operation has both CPU and GPU implementations, TensorFlow will automatically place the operation to run on a GPU device first. If you have more than one GPU, the GPU with the lowest ID will be selected by default. However, TensorFlow does not place operations into multiple GPUs automatically.

TL;DR Run your function as a new process⁺ .

tf.reset_default_graph() is not guaranteed to release memory^#. When a process dies, all the memory it was given (including your GPU Memory) will be released. Not only does this help keep things neatly organized, but also, you can analyze how much CPU, GPU, RAM, GPU Memory each process consumes.

For example, if you had these functions,

def train_model(x, y, params):
  model = ModelWrapper(params.filepath)
  model.fit(x, y, epochs=params.epochs)


def predict_model(x, params):
  model = ModelWrapper(params.filepath)
  y_pred = model.predict(x)
  print(y_pred.shape)

You can use it like,

import multiprocessing

for i in range(8):
  print(f"Training Model {i} from {params.filepath}")
  process_train = multiprocessing.Process(train_model, args=(x_train, y_train, params))
  process_train.start()
  process_train.join()

print("Predicting")
process_predict = multiprocessing.Process(predict_model, args=(x_train, params))
process_predict.start()
process_predict.join()

This way python fires a new process for your tasks, which can run with their own memory.

Bonus Tip: You can also choose to run them in parallel if you have many CPUs and GPUs available: you just need to call process_train.join() after the loop in that case. If you had eight GPUs, you can use this parent script to serve parameters, while each of the individual processes shall run on a different GPU.

^# I tried a variety of things, separately and together, before I started using processes,

tf.reset_default_graph()
K.clear_session()
cuda.select_device(0); cuda.close()
model = get_new_model() # overwrite
model = None
del model
gc.collect()

⁺ I also considered using threads, subprocess.Popen, but I was satisfied with multiprocessing since it offered full decoupling that made it a lot easier to manage and allocate resources.

Tensorflow: model wrapper that can release GPU resources

Tags:

python

tensorflow

gpu

numba

mrgloom

People also ask

1 Answers

Saravanabalagi Ramachandran

Recent Activity

Donate For Us

Tensorflow: model wrapper that can release GPU resources

Tags:

python

tensorflow

gpu

numba

mrgloom

People also ask

1 Answers

Saravanabalagi Ramachandran

Related questions

Recent Activity

Donate For Us