tf.tape.gradient() returns None for certain losses

Tags:

I am trying to figure out why sometimes tf.GradientTape().gradient returns None, so I used the below three loss functions(mmd0(), mmd1(), mmd2()), although the formats are a bit different for mmd0 and mmd1, the gradients are still returned, but for mmd2, the gradients are None. I print out the loss from those three function, does anyone why why it behaves like this?

def mmd0(x, y): # a and b are lists of aribiturary lengths
  return x  

def mmd1(x1, x2): # a and b are lists of aribiturary lengths
  dis = sum([x**2 for x in x1])/len(x1) - sum([x**2 for x in x2])/len(x2)
  return dis**2

def mmd2(x, y):
  dis = x-y
  return [tf.convert_to_tensor(elem) for elem in dis]

def get_MMD_norm(errors, sigma=0.1): 
  x2 = np.random.normal(0, sigma, len(errors))
  loss0 = mmd0(errors, x2)
  loss1 = mmd1(errors, x2)
  loss2 = mmd2(errors, x2)
  print("loss0:", loss0)
  print("loss1:", loss1)
  print("loss2:", loss2)
  return tf.cast(loss2, tf.float32)

def loss(model, x, y, sigma=0.1):
  y_ = model(x) # y_.shape is (batch_size, 3) for Iris dataset
  losses = []
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
  for i in range(y.shape[0]):
    loss = loss_object(y_true=y[i], y_pred=y_[i])
    losses.append(loss) 
  batch_loss = get_MMD_norm(losses)
  single_losses_list = [loss.numpy() for loss in losses]
  return tf.convert_to_tensor(batch_loss, dtype=np.float32), single_losses_list

def grad(model, inputs, targets, sigma=0.1):
  with tf.GradientTape() as tape:
    tape.watch(model.trainable_variables)
    batch_loss, single_losses = loss(model, inputs, targets, sigma=0.1)
  return tape.gradient(batch_loss, model.trainable_variables), batch_loss, single_losses 

grads, batch_loss, single_losses = grad(model, features, labels)
print("grads:", grads)
print("batch_loss:", batch_loss)
##########################################################
loss0: [<tf.Tensor: id=39621, shape=(), dtype=float32, numpy=2.1656876>, <tf.Tensor: id=39659, shape=(), dtype=float32, numpy=2.057112>, <tf.Tensor: id=39697, shape=(), dtype=float32, numpy=2.2769136>, <tf.Tensor: id=39735, shape=(), dtype=float32, numpy=2.0263004>, <tf.Tensor: id=39773, shape=(), dtype=float32, numpy=2.1568372>, <tf.Tensor: id=39811, shape=(), dtype=float32, numpy=0.7392154>, <tf.Tensor: id=39849, shape=(), dtype=float32, numpy=0.7742219>, <tf.Tensor: id=39887, shape=(), dtype=float32, numpy=2.2176154>, <tf.Tensor: id=39925, shape=(), dtype=float32, numpy=1.0187237>, <tf.Tensor: id=39963, shape=(), dtype=float32, numpy=2.160415>, <tf.Tensor: id=40001, shape=(), dtype=float32, numpy=0.80997854>, <tf.Tensor: id=40039, shape=(), dtype=float32, numpy=0.70803094>, <tf.Tensor: id=40077, shape=(), dtype=float32, numpy=0.8207226>, <tf.Tensor: id=40115, shape=(), dtype=float32, numpy=0.82957774>, <tf.Tensor: id=40153, shape=(), dtype=float32, numpy=0.88732547>, <tf.Tensor: id=40191, shape=(), dtype=float32, numpy=0.90633464>, <tf.Tensor: id=40229, shape=(), dtype=float32, numpy=0.7932346>, <tf.Tensor: id=40267, shape=(), dtype=float32, numpy=2.1767666>, <tf.Tensor: id=40305, shape=(), dtype=float32, numpy=0.80166155>, <tf.Tensor: id=40343, shape=(), dtype=float32, numpy=0.7831647>, <tf.Tensor: id=40381, shape=(), dtype=float32, numpy=0.77431095>, <tf.Tensor: id=40419, shape=(), dtype=float32, numpy=0.82067406>, <tf.Tensor: id=40457, shape=(), dtype=float32, numpy=0.74510425>, <tf.Tensor: id=40495, shape=(), dtype=float32, numpy=2.1666338>, <tf.Tensor: id=40533, shape=(), dtype=float32, numpy=0.7922478>, <tf.Tensor: id=40571, shape=(), dtype=float32, numpy=0.73235756>, <tf.Tensor: id=40609, shape=(), dtype=float32, numpy=2.1792874>, <tf.Tensor: id=40647, shape=(), dtype=float32, numpy=0.919183>, <tf.Tensor: id=40685, shape=(), dtype=float32, numpy=0.761979>, <tf.Tensor: id=40723, shape=(), dtype=float32, numpy=2.1664479>, <tf.Tensor: id=40761, shape=(), dtype=float32, numpy=0.77892226>, <tf.Tensor: id=40799, shape=(), dtype=float32, numpy=0.99058735>]
loss1: tf.Tensor(4.158007, shape=(), dtype=float32)
loss2: [<tf.Tensor: id=40935, shape=(), dtype=float64, numpy=2.325676997771268>, <tf.Tensor: id=40936, shape=(), dtype=float64, numpy=1.9988182000798667>, <tf.Tensor: id=40937, shape=(), dtype=float64, numpy=2.303379813455908>, <tf.Tensor: id=40938, shape=(), dtype=float64, numpy=2.0615775258879356>, <tf.Tensor: id=40939, shape=(), dtype=float64, numpy=2.2949723624257774>, <tf.Tensor: id=40940, shape=(), dtype=float64, numpy=0.7019287657319235>, <tf.Tensor: id=40941, shape=(), dtype=float64, numpy=0.8522054859739794>, <tf.Tensor: id=40942, shape=(), dtype=float64, numpy=2.0819949907118125>, <tf.Tensor: id=40943, shape=(), dtype=float64, numpy=1.065878291073558>, <tf.Tensor: id=40944, shape=(), dtype=float64, numpy=2.1225998300026805>, <tf.Tensor: id=40945, shape=(), dtype=float64, numpy=0.9485520218242218>, <tf.Tensor: id=40946, shape=(), dtype=float64, numpy=0.7221746903906889>, <tf.Tensor: id=40947, shape=(), dtype=float64, numpy=0.9985009994522388>, <tf.Tensor: id=40948, shape=(), dtype=float64, numpy=0.9143119687525019>, <tf.Tensor: id=40949, shape=(), dtype=float64, numpy=0.9230117922853999>, <tf.Tensor: id=40950, shape=(), dtype=float64, numpy=1.0220225043292934>, <tf.Tensor: id=40951, shape=(), dtype=float64, numpy=0.8735972169951878>, <tf.Tensor: id=40952, shape=(), dtype=float64, numpy=2.1279260795512753>, <tf.Tensor: id=40953, shape=(), dtype=float64, numpy=0.9597649765787801>, <tf.Tensor: id=40954, shape=(), dtype=float64, numpy=0.8338326272407959>, <tf.Tensor: id=40955, shape=(), dtype=float64, numpy=0.6674084331022461>, <tf.Tensor: id=40956, shape=(), dtype=float64, numpy=0.8679296826013285>, <tf.Tensor: id=40957, shape=(), dtype=float64, numpy=0.8174893483228802>, <tf.Tensor: id=40958, shape=(), dtype=float64, numpy=2.212290299049252>, <tf.Tensor: id=40959, shape=(), dtype=float64, numpy=0.7304098620074719>, <tf.Tensor: id=40960, shape=(), dtype=float64, numpy=0.8463413221121661>, <tf.Tensor: id=40961, shape=(), dtype=float64, numpy=2.3081013094190443>, <tf.Tensor: id=40962, shape=(), dtype=float64, numpy=1.0314178020997722>, <tf.Tensor: id=40963, shape=(), dtype=float64, numpy=0.774951045805575>, <tf.Tensor: id=40964, shape=(), dtype=float64, numpy=2.127838465488091>, <tf.Tensor: id=40965, shape=(), dtype=float64, numpy=0.909498425717612>, <tf.Tensor: id=40966, shape=(), dtype=float64, numpy=1.0217239989370837>]
grads: [None, None, None, None, None, None]
batch_loss: tf.Tensor(
[2.325677   1.9988182  2.3033798  2.0615776  2.2949724  0.7019288
 0.8522055  2.081995   1.0658783  2.1225998  0.948552   0.7221747
 0.998501   0.91431195 0.9230118  1.0220225  0.8735972  2.127926
 0.95976496 0.8338326  0.6674084  0.8679297  0.8174893  2.2122903
 0.73040986 0.8463413  2.3081014  1.0314178  0.77495104 2.1278384
 0.90949845 1.021724  ], shape=(32,), dtype=float32)

331

asked Jul 02 '19 18:07

moon

1 Answers

Have you seen this answer? I think I'm having a similar issue, and I believe that your might be related to mine. It has to do with the loss that is computed with a step somewhere in the process where the tensor of interest is "lost" from the start of the tape to the end. The referenced answer notes that the original poster had an area where a numpy array was returned instead of a tensorflow tensor, thus leading to the Gradient Tape failing to compute the gradient.

I could be wrong because I am nowhere near a tensorflow expert, but that is something I keep seeing popping up while searching for a solution to my similar issue.

112

answered Sep 27 '22 15:09

wandadars

Related questions
                            
                                In tensorflow estimator, what does it mean for num_epochs to be None?
                            
                                Keras Top 5 predictions
                            
                                Creating a neural network in keras to multiply two input integers
                            
                                How to load pickle files by tensorflow's tf.data API
                            
                                Hyper-parameter optimization in tensorflow object detection API
                            
                                Correct payload for TensorFlow Serving REST API
                            
                                Trying to restore model, but tf.train.import_meta_graph(meta_path) raises error
                            
                                Opening Keras model with embedding layer in Tensorflow in Golang
                            
                                How to use Keras generator with tf.data API
                            
                                Duplicate node name in graph: 'conv2d_0/kernel/Adam'
                            
                                How to monitor validation loss in the training of estimators in TensorFlow?
                            
                                ValueError: Cannot take the length of Shape with unknown rank
                            
                                Tensorflow Adam optimizer vs Keras Adam optimizer
                            
                                Should I return dataset directly or should i use one_shot iterator instead?
                            
                                Too Much Memory Issue with Semantic Image Segmentation NN (DeepLabV3+)
                            
                                How to support masking in custom tf.keras.layers.Layer
                            
                                Is there a way to pass dictionary in tf.data.Dataset w/ tf.py_func?
                            
                                Not found: Container localhost does not exist when I load model with tensorflow and flask
                            
                                Why my one-filter convolutional neural network is unable to learn a simple gaussian kernel?
                            
                                Tensorflow profiling in TF2.0

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

tf.tape.gradient() returns None for certain losses

Tags:

tensorflow

tensor

moon

People also ask

1 Answers

wandadars

Recent Activity

Donate For Us