Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Keras NN loss not decreasing

I am using Keras 2.07, with Python 3.5, Tensorflow 1.3.0 on Windows 10

I am testing the architecture used in paper Long-term Temporal Convolutions for Action Recognition. I hope to use it for my own data. I used test data which I thought would converge very quickly. The learning rate stepped down to 1.e-6 but the loss never moved off a high value of 4.5549672...

Can someone look or try this code? Am I assuming wrong, coding wrong or impatient?

Thanks

## Attempt to implement based on ...
#Long-term Temporal Convolutionsfor Action Recognition
#
#Gul Varol, Ivan Laptev, and Cordelia Schmid, ¨ Fellow, IEEE
#
import time
import numpy as np,cv2
import sys
import os

import keras
import tensorflow as tf

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten,  Input
from keras.layers import Conv3D, MaxPooling3D
from keras.layers import ZeroPadding3D, AveragePooling3D, MaxPooling3D,LeakyReLU
from keras.optimizers import SGD,rmsprop, adam, Adagrad, Nadam, Adadelta

from keras import regularizers


from keras import backend as K


K.clear_session()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=.99, allocator_type = 'BFC') 
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True, gpu_options=gpu_options))
config = tf.ConfigProto()


#path information
sample_root_path = "C:\\"

cur=os.chdir(sample_root_path )




batch_size = 15
num_classes = 1
epochs = 5000
my_lambda = 0.

nchan = 3

m_loss = 'binary_crossentropy'

m_opt = Nadam(lr=0.02, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
#
m_met = ['binary_accuracy'] 

#test data
nvideos = 300
nframes = 30
nrows = 60
ncols = 80
nchan = 3

x_flow = np.ones((nvideos, nframes,nrows,ncols,nchan),np.float32)
x_flow[150:] =-1.0
y_flow = np.ones((nvideos),np.float32)
y_flow[150:]= 0




t_inp =Input(shape=(nframes,nrows,ncols,nchan),name='t_inp')

t =  Conv3D(64, (3, 3,3), activation='relu',padding="same", name="conv1", strides=(1, 1, 1),
                 kernel_initializer = 'glorot_normal',
                 bias_initializer = 'glorot_normal', 
                 bias_regularizer = regularizers.l1(my_lambda), 
                 trainable=True) (t_inp)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,1), strides=(2, 2, 2),name='pool1') (t)


t =  Conv3D(128, (3, 3,3), activation='relu',padding="same", name="conv2", strides=(1, 1, 1), 
                  bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool2') (t)




t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv3", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool3') (t)



t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv4", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool4') (t)



t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv5", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
#
t = MaxPooling3D(pool_size=(1,1,1), strides=(1, 1, 1),name='pool5') (t)
##*****************************

t = Flatten () (t)
t = Dense((2048),name='s_den00') (t)
t = Dropout(.5) (t)
t = Dense((2048),name='s_den0') (t)
t = Dropout(.5) (t)


t = Dense((num_classes),activation='softmax',name='s_den1') (t)


model = Model(inputs=t_inp,outputs=t)
print (model.summary())

model.compile(loss=m_loss, optimizer=m_opt, metrics=m_met)

print ('compiled model')
tb = keras.callbacks.TensorBoard(log_dir=sample_root_path+'logs', histogram_freq=0,
                          write_graph=True, write_grads=True, write_images=True)

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,verbose=1, patience=3, min_lr=0.000001)

with tf.device('/gpu:0'):
  history = model.fit(x_flow, y_flow,
                      batch_size=batch_size,
                      callbacks=[reduce_lr,
                                 tb],
                      verbose=1,
                      validation_split=.3,
                      shuffle = True,
                      epochs=epochs)

print ('done')
like image 928
DSP209 Avatar asked Aug 22 '17 20:08

DSP209


1 Answers

Problem is the output layers activation function. You would only use softmax for a two class structure if you output array looked like this

[[1,0],
 [1,0],
 [0,1]]

softmax assumes the output will sum to 1, so if you predict on your input values you will notice that they are all 1, hence the loss never decreases. You can setup your output like above, or you can keep it the same, as a single binary column like this

[[1],
 [1],
 [0]]

and use the sigmoid activation function, which optimizes the output on a range from 0-1. So your last layer will be

t = Dense((num_classes),activation='sigmoid',name='s_den1') (t)
like image 180
DJK Avatar answered Nov 17 '22 07:11

DJK