Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to apply Drop Out in Tensorflow to improve the accuracy of neural network?

Drop-Out is regularization techniques. And I want to apply it to notMNIST data to reduce over-fitting to finish my Udacity Deep Learning Course Assignment.I have read the docs of tensorflow on how to call the tf.nn.dropout. And here is my code

# before proceeding further. from __future__ import print_function import numpy as np   import tensorflow as tf from six.moves import cPickle as pickle   pickle_file = 'notMNIST.pickle'  with open(pickle_file, 'rb') as f:     save = pickle.load(f)     train_dataset = save['train_dataset']     train_labels = save['train_labels']     valid_dataset = save['valid_dataset']     valid_labels = save['valid_labels']     test_dataset = save['test_dataset']     test_labels = save['test_labels']     del save  # hint to help gc free up memory     print('Training set', train_dataset.shape, train_labels.shape)     print('Validation set', valid_dataset.shape, valid_labels.shape)     print('Test set', test_dataset.shape, test_labels.shape)   image_size = 28 num_labels = 10  def reformat(dataset, labels):     dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)     # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]     labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)     return dataset, labels      train_dataset, train_labels = reformat(train_dataset, train_labels)     valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)     test_dataset, test_labels = reformat(test_dataset, test_labels)     print('Training set', train_dataset.shape, train_labels.shape)     print('Validation set', valid_dataset.shape, valid_labels.shape)     print('Test set', test_dataset.shape, test_labels.shape)      def accuracy(predictions, labels):         return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))  / predictions.shape[0])   # ReLU neuron # param training_epochs = 30 batch_size = 521 display_step = 1 n_input = 784 # img shape: 28*28 n_classes = 10 # MNIST total classes (0-9 digits)  # hyper-parameter n_hidden_1 = 256  learning_rate = 0.05 lambda_term = 0.01   graph = tf.Graph() with graph.as_default():     # init weights     weights_hiden =  tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=np.sqrt(n_input)))     weights_out = tf.Variable(tf.random_normal([n_hidden_1, n_classes], stddev=np.sqrt(n_hidden_1)))      biases_hidden = tf.Variable(tf.random_normal([n_hidden_1]))     biases_out = tf.Variable(tf.random_normal([n_classes]))      x = tf.placeholder("float", [None, n_input])     y = tf.placeholder("float", [None, n_classes])      def model(x, weights_hiden, weights_out, biases_hidden, biases_out):         # hidden layer with RELU activation         layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))         # apply DropOut to hidden layer         keep_prob = tf.placeholder(tf.float32)  # DROP-OUT here         drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here         # output layer with linear activation         out_layer = tf.matmul(layer_1, weights_out) + biases_out         return out_layer      # Construct model     pred = model(x, weights_hiden, weights_out, biases_hidden, biases_out)      # Define loss and optimizer     cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y) +                           lambda_term * tf.nn.l2_loss(weights_hiden) +                            lambda_term * tf.nn.l2_loss(weights_out) +                           lambda_term * tf.nn.l2_loss(biases_hidden) +                            lambda_term * tf.nn.l2_loss(biases_out))     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)   # run the graph with tf.Session(graph=graph) as sess:     tf.initialize_all_variables().run()     print('Initialized')     # Training cycle     for epoch in range(training_epochs):         avg_cost = 0.         total_batch = int(train_dataset.shape[0]/batch_size)         # Loop over all batches         for i in range(total_batch):             batch_x = train_dataset[(i*batch_size):((i*batch_size) + batch_size), :]             batch_y = train_labels[(i*batch_size):((i*batch_size) + batch_size), :]             # Run optimization op (backprop) and cost op (to get loss value)             _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})             # Compute average loss             avg_cost += c / total_batch         # Display logs per epoch step         if epoch % display_step == 0:             print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))     print("Optimization Finished!")      # Test model     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))     # Calculate accuracy     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))     print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels}))     print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels})) 

The tf.nn.dropout is called in function model(), but after I applied the DropOut technique to the neural network, the accuracy did seem any change, here is the result:

Epoch: 0001 cost= 579980.086977807 Epoch: 0002 cost= 238859.802382506 Epoch: 0003 cost= 90672.733752856 Epoch: 0004 cost= 32649.040985028 Epoch: 0005 cost= 11325.878361874 Epoch: 0006 cost= 3866.805511076 Epoch: 0007 cost= 1357.785540469 Epoch: 0008 cost= 519.381747333 Epoch: 0009 cost= 225.359804119 Epoch: 0010 cost= 110.099476707 Epoch: 0011 cost= 55.212384386 Epoch: 0012 cost= 28.469241683 Epoch: 0013 cost= 14.511494627 Epoch: 0014 cost= 6.567228943 Epoch: 0015 cost= 3.186372240 Epoch: 0016 cost= 1.701917576 Epoch: 0017 cost= 1.041632473 Epoch: 0018 cost= 0.843376874 Epoch: 0019 cost= 0.786183911 Epoch: 0020 cost= 0.775412846 Epoch: 0021 cost= 0.782965020 Epoch: 0022 cost= 0.796788171 Epoch: 0023 cost= 0.814522117 Epoch: 0024 cost= 0.832090579 Epoch: 0025 cost= 0.849197715 Epoch: 0026 cost= 0.867473578 Epoch: 0027 cost= 0.889561496 Epoch: 0028 cost= 0.921837020 Epoch: 0029 cost= 16.655304543 Epoch: 0030 cost= 1.421570476 Optimization Finished! Test data accuracy: 0.8775 Valid data accuracy: 0.8069 

How can I apply DropOut by Tensorflow to improve the accuracy of the network? Thank you!

like image 576
GoingMyWay Avatar asked Nov 30 '16 03:11

GoingMyWay


People also ask

In what way does dropout help to better train a neural network?

— Dropout: A Simple Way to Prevent Neural Networks from Overfitting, 2014. Because the outputs of a layer under dropout are randomly subsampled, it has the effect of reducing the capacity or thinning the network during training. As such, a wider network, e.g. more nodes, may be required when using dropout.

How does dropout work in TensorFlow?

The Dropout layer randomly sets input units to 0 with a frequency of rate at each step during training time, which helps prevent overfitting. Inputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over all inputs is unchanged.


2 Answers

In the graph, I'd suggest to move keep_prob = tf.placeholder(tf.float32) outside of the model function to make it global.

with graph.as_default():     ...     x = tf.placeholder("float", [None, n_input])     y = tf.placeholder("float", [None, n_classes])     keep_prob = tf.placeholder(tf.float32)      def model(x, weights_hiden, weights_out, biases_hidden, biases_out, keep_prob):         # hidden layer with RELU activation         layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))         # apply DropOut to hidden layer         drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here         # output layer with linear activation         out_layer = tf.matmul(drop_out, weights_out) + biases_out         return out_layer     ... 

When running session, feed a desired keep_prob value during training time, and feed 1.0 to keep_prob during reference (validation and/or testing) time.

# run the graph with tf.Session(graph=graph) as sess:     tf.initialize_all_variables().run()     ...     for epoch in range(training_epochs):         ...         for i in range(total_batch):             batch_x = ...             batch_y = ...             # Run optimization op (backprop) and cost op (to get loss value)             # Feed a value < 1.0 for keep prob during training             _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob : 0.5})     ...     # Feed 1.0 for keep prob during testing     print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels, keep_prob : 1.0}))     print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels, keep_prob : 1.0})) 
like image 84
Zhongyu Kuang Avatar answered Sep 30 '22 00:09

Zhongyu Kuang


The key point here is that:

    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))     # apply DropOut to hidden layer     keep_prob = tf.placeholder(tf.float32)  # DROP-OUT here     drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here     # output layer with linear activation     out_layer = tf.matmul(layer_1, weights_out) + biases_out 

Becomes:

    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))     # apply DropOut to hidden layer     drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here     # output layer with linear activation     out_layer = tf.matmul(drop_out, weights_out) + biases_out 

Where drop_out is being used in the final line as oppose to layer_1. As this would otherwise ignore the dropout line.

like image 22
James Shiztar Avatar answered Sep 29 '22 23:09

James Shiztar