I am using Keras 2.07, with Python 3.5, Tensorflow 1.3.0 on Windows 10
I am testing the architecture used in paper Long-term Temporal Convolutions for Action Recognition. I hope to use it for my own data. I used test data which I thought would converge very quickly. The learning rate stepped down to 1.e-6 but the loss never moved off a high value of 4.5549672...
Can someone look or try this code? Am I assuming wrong, coding wrong or impatient?
Thanks
## Attempt to implement based on ...
#Long-term Temporal Convolutionsfor Action Recognition
#
#Gul Varol, Ivan Laptev, and Cordelia Schmid, ¨ Fellow, IEEE
#
import time
import numpy as np,cv2
import sys
import os
import keras
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Conv3D, MaxPooling3D
from keras.layers import ZeroPadding3D, AveragePooling3D, MaxPooling3D,LeakyReLU
from keras.optimizers import SGD,rmsprop, adam, Adagrad, Nadam, Adadelta
from keras import regularizers
from keras import backend as K
K.clear_session()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=.99, allocator_type = 'BFC')
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True, gpu_options=gpu_options))
config = tf.ConfigProto()
#path information
sample_root_path = "C:\\"
cur=os.chdir(sample_root_path )
batch_size = 15
num_classes = 1
epochs = 5000
my_lambda = 0.
nchan = 3
m_loss = 'binary_crossentropy'
m_opt = Nadam(lr=0.02, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
#
m_met = ['binary_accuracy']
#test data
nvideos = 300
nframes = 30
nrows = 60
ncols = 80
nchan = 3
x_flow = np.ones((nvideos, nframes,nrows,ncols,nchan),np.float32)
x_flow[150:] =-1.0
y_flow = np.ones((nvideos),np.float32)
y_flow[150:]= 0
t_inp =Input(shape=(nframes,nrows,ncols,nchan),name='t_inp')
t = Conv3D(64, (3, 3,3), activation='relu',padding="same", name="conv1", strides=(1, 1, 1),
kernel_initializer = 'glorot_normal',
bias_initializer = 'glorot_normal',
bias_regularizer = regularizers.l1(my_lambda),
trainable=True) (t_inp)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,1), strides=(2, 2, 2),name='pool1') (t)
t = Conv3D(128, (3, 3,3), activation='relu',padding="same", name="conv2", strides=(1, 1, 1),
bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool2') (t)
t = Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv3", strides=(1, 1, 1),
bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool3') (t)
t = Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv4", strides=(1, 1, 1),
bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool4') (t)
t = Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv5", strides=(1, 1, 1),
bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
#
t = MaxPooling3D(pool_size=(1,1,1), strides=(1, 1, 1),name='pool5') (t)
##*****************************
t = Flatten () (t)
t = Dense((2048),name='s_den00') (t)
t = Dropout(.5) (t)
t = Dense((2048),name='s_den0') (t)
t = Dropout(.5) (t)
t = Dense((num_classes),activation='softmax',name='s_den1') (t)
model = Model(inputs=t_inp,outputs=t)
print (model.summary())
model.compile(loss=m_loss, optimizer=m_opt, metrics=m_met)
print ('compiled model')
tb = keras.callbacks.TensorBoard(log_dir=sample_root_path+'logs', histogram_freq=0,
write_graph=True, write_grads=True, write_images=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,verbose=1, patience=3, min_lr=0.000001)
with tf.device('/gpu:0'):
history = model.fit(x_flow, y_flow,
batch_size=batch_size,
callbacks=[reduce_lr,
tb],
verbose=1,
validation_split=.3,
shuffle = True,
epochs=epochs)
print ('done')
Problem is the output layers activation function. You would only use softmax
for a two class structure if you output array looked like this
[[1,0],
[1,0],
[0,1]]
softmax
assumes the output will sum to 1, so if you predict on your input values you will notice that they are all 1, hence the loss never decreases. You can setup your output like above, or you can keep it the same, as a single binary column like this
[[1],
[1],
[0]]
and use the sigmoid
activation function, which optimizes the output on a range from 0-1. So your last layer will be
t = Dense((num_classes),activation='sigmoid',name='s_den1') (t)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With