I have a custom neural network that I am training on data and seek to constrain the network's output values to always be between two arbitrary constants: [lower_bound,upper_bound]
. Are there any best practices for encoding this constraint in a loss function?
Below I have written a minimal working example whereby I construct and train a neural network on generated data. Additionally, I place the arbitrary constraint that the output should be between [lower_bound,upper_bound] = [-0.5,0.75]
in the loss function being optimized. But I attempt it using a relatively crude method of finding all instances where predicted values exceed the bounds, and then simply making the loss function for these terms a large value (and otherwise zero if the predicted value is within the given bounds):
lower_bound = -0.5 #a guessed a priori lower bound on the output
upper_bound = 0.75 #a guessed a priori upper bound on the output
cond_v1_1 = tf.greater(self.v1_pred[:,0], upper_bound*tf.ones(tf.shape(self.v1_pred[:,0])))
cond_v1_2 = tf.greater(-1.0*self.v1_pred[:,0], lower_bound*tf.ones(tf.shape(self.v1_pred[:,0])))
self.red_v1 = tf.where(cond_v1_1, 100000.0*tf.ones(tf.shape(self.v1_pred[:,0])), 0.0*tf.zeros(tf.shape(self.v1_pred[:,0])))
self.red_v1 = tf.where(cond_v1_2, 100000.0*tf.ones(tf.shape(self.v1_pred[:,0])), self.red_v1)
self.loss_cond = tf.reduce_sum(1.0*tf.square(self.red_v1))
But are there any methods or loss functions to better encode this constraint when training the neural network? Perhaps a smoother loss function easier for the optimizers to handle and/or revisions to my code itself? Any comments and further thoughts on best practices for penalizing/training the neural network in the code below given a bound on outputs would be greatly appreciated.
import numpy as np
import tensorflow as tf
end_it = 1000 #number of iterations
frac_train = 1.0 #randomly sampled fraction of data to create training set
frac_sample_train = 0.01 #randomly sampled fraction of data from training set to train in batches
layers = [2, 20, 20, 20, 1]
#Generate training data
len_data = 10000
x_x = np.array([np.linspace(0.,1.,len_data)])
x_y = np.array([np.linspace(0.,1.,len_data)])
y_true = np.array([np.linspace(-0.2,0.2,len_data)])
N_train = int(frac_train*len_data)
idx = np.random.choice(len_data, N_train, replace=False)
x_train = x_x.T[idx,:]
y_train = x_y.T[idx,:]
v1_train = y_true.T[idx,:]
sample_batch_size = int(frac_sample_train*N_train)
np.random.seed(1234)
tf.set_random_seed(1234)
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
tf.logging.set_verbosity(tf.logging.ERROR)
class NeuralNet:
def __init__(self, x, y, v1, layers):
X = np.concatenate([x, y], 1)
self.lb = X.min(0)
self.ub = X.max(0)
self.X = X
self.x = X[:,0:1]
self.y = X[:,1:2]
self.v1 = v1
self.layers = layers
self.weights_v1, self.biases_v1 = self.initialize_NN(layers)
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=False,
log_device_placement=False))
self.x_tf = tf.placeholder(tf.float32, shape=[None, self.x.shape[1]])
self.y_tf = tf.placeholder(tf.float32, shape=[None, self.y.shape[1]])
self.v1_tf = tf.placeholder(tf.float32, shape=[None, self.v1.shape[1]])
self.v1_pred = self.net(self.x_tf, self.y_tf)
lower_bound = -0.5 #a guessed a priori lower bound on the output
upper_bound = 0.75 #a guessed a priori upper bound on the output
cond_v1_1 = tf.greater(self.v1_pred[:,0], upper_bound*tf.ones(tf.shape(self.v1_pred[:,0])))
cond_v1_2 = tf.greater(-1.0*self.v1_pred[:,0], lower_bound*tf.ones(tf.shape(self.v1_pred[:,0])))
self.red_v1 = tf.where(cond_v1_1, 100000.0*tf.ones(tf.shape(self.v1_pred[:,0])), 0.0*tf.zeros(tf.shape(self.v1_pred[:,0])))
self.red_v1 = tf.where(cond_v1_2, 100000.0*tf.ones(tf.shape(self.v1_pred[:,0])), self.red_v1)
self.loss_cond = tf.reduce_sum(1.0*tf.square(self.red_v1))
self.loss_data = tf.reduce_mean(tf.square(self.v1_tf - self.v1_pred))
self.loss = self.loss_cond + self.loss_data
self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.loss,
var_list=self.weights_v1+self.biases_v1,
method = 'L-BFGS-B',
options = {'maxiter': 50,
'maxfun': 50000,
'maxcor': 50,
'maxls': 50,
'ftol' : 1.0 * np.finfo(float).eps})
self.optimizer_Adam = tf.train.AdamOptimizer()
self.train_op_Adam_v1 = self.optimizer_Adam.minimize(self.loss, var_list=self.weights_v1+self.biases_v1)
init = tf.global_variables_initializer()
self.sess.run(init)
def initialize_NN(self, layers):
weights = []
biases = []
num_layers = len(layers)
for l in range(0,num_layers-1):
W = self.xavier_init(size=[layers[l], layers[l+1]])
b = tf.Variable(tf.zeros([1,layers[l+1]], dtype=tf.float32), dtype=tf.float32)
weights.append(W)
biases.append(b)
return weights, biases
def xavier_init(self, size):
in_dim = size[0]
out_dim = size[1]
xavier_stddev = np.sqrt(2/(in_dim + out_dim))
return tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)
def neural_net(self, X, weights, biases):
num_layers = len(weights) + 1
H = 2.0*(X - self.lb)/(self.ub - self.lb) - 1.0
for l in range(0,num_layers-2):
W = weights[l]
b = biases[l]
H = tf.tanh(tf.add(tf.matmul(H, W), b))
W = weights[-1]
b = biases[-1]
Y = tf.add(tf.matmul(H, W), b)
return Y
def net(self, x, y):
v1_out = self.neural_net(tf.concat([x,y], 1), self.weights_v1, self.biases_v1)
v1 = v1_out[:,0:1]
return v1
def callback(self, loss):
global Nfeval
print(str(Nfeval)+' - Loss in loop: %.3e' % (loss))
Nfeval += 1
def fetch_minibatch(self, x_in, y_in, v1_in, N_train_sample):
idx_batch = np.random.choice(len(x_in), N_train_sample, replace=False)
x_batch = x_in[idx_batch,:]
y_batch = y_in[idx_batch,:]
v1_batch = v1_in[idx_batch,:]
return x_batch, y_batch, v1_batch
def train(self, end_it):
it = 0
while it < end_it:
x_res_batch, y_res_batch, v1_res_batch = self.fetch_minibatch(self.x, self.y, self.v1, sample_batch_size) # Fetch residual mini-batch
tf_dict = {self.x_tf: x_res_batch, self.y_tf: y_res_batch,
self.v1_tf: v1_res_batch}
self.sess.run(self.train_op_Adam_v1, tf_dict)
self.optimizer.minimize(self.sess,
feed_dict = tf_dict,
fetches = [self.loss],
loss_callback = self.callback)
it = it + 1
def predict(self, x_star, y_star):
tf_dict = {self.x_tf: x_star, self.y_tf: y_star}
v1_star = self.sess.run(self.v1_pred, tf_dict)
return v1_star
model = NeuralNet(x_train, y_train, v1_train, layers)
Nfeval = 1
model.train(end_it)
Make sure your normalization function lets you transform them back later. The sigmoid activation function always outputs between 0, 1. Just make sure your last layer has sigmoid activation to restrict your output into that range. Now you can take your outputs and transform them back into the range you wanted.
A neural network is array of decision making algorithm where combination of neuronal units are used to get a decision out of a series of input. A neuronal unit takes 2 or more input and gives a single output. Combination of units may yield to n number of decisions based on inputs they make.
There will be two outputs, one from each classifier (i.e. hidden neuron).
Multilayer Perceptron. Convolutional Neural Network. Radial Basis Functional Neural Network. Recurrent Neural Network.
The best way (IMHO) to do such a thing is to enforce it via the output activation function. We can use a tf.nn.sigmoid
as a basis, which is bounded between [0, 1], and slightly shift and scale it.
def bounded_output(x, lower, upper):
scale = upper - lower
return scale * tf.nn.sigmoid(x) + lower
In your case, call it with lower=-0.5
and upper=0.75
. This will shift the sigmoid so that the lowest output is -0.5, and the range is 0.75 + 0.5 = 1.25
, which puts the upper limit at 0.75. Adding this as an output activation at the final layer of your net means outputs cannot fall outside the range.
One problem: This can lead to bad gradients because the function saturates as it approaches the limits. So if your network is producing outputs close to those limits, gradients will be small and learning can be slow.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With