Tensorflow. Nonlinear regression

Question

I have these feature and label, that are not linear enough to be satisfied with linear solution. I trained SVR(kernel='rbf') model from sklearn, but now its time to do it with tensorflow, and its hard to say what one should write to achieve same or better effect.

enter image description here

Do you see that lazy orange line down there? It doesn't fill you with determination

code itself:

import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model, clean_df
import os
from sklearn import preprocessing

graph = tf.get_default_graph()

# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')
lin_model = tf.add(tf.multiply(x_, w), b)

#loss
loss = tf.reduce_mean(tf.pow(lin_model - y_, 2), name='loss')
train_step = tf.train.GradientDescentOptimizer(0.000000025).minimize(loss)

#nonlinear part
nonlin_model = tf.tanh(tf.add(tf.multiply(x_, w), b))
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.000000025).minimize(nonlin_loss)       


# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)

liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)

#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)


n_epochs = 200
train_errors = []
non_train_errors = []
test_errors = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in tqdm.tqdm(range(n_epochs)):

        _, train_err, summ = sess.run([train_step, loss, summaries],
                                feed_dict={x_: parrots, y_: liters})
        summary_writer.add_summary(summ, i)
        train_errors.append(train_err)

        _, non_train_err, = sess.run([train_step_nonlin, nonlin_loss],
                                      feed_dict={x_: parrots, y_: liters})
        non_train_errors.append(non_train_err)


    plt.plot(list(range(n_epochs)), train_errors, label='train_lin')
    plt.plot(list(range(n_epochs)), non_train_errors, label='train_nonlin')
    plt.legend()
    print(train_errors[:10])
    print(non_train_errors[:10])
    plt.show()

    plt.scatter(parrots, liters, label='actual data')
    plt.plot(parrots, sess.run(lin_model, feed_dict={x_: parrots}), label='linear (tf)')
    plt.plot(parrots, sess.run(nonlin_model, feed_dict={x_: parrots}), label='nonlinear (tf)')
    plt.plot(parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
    plt.legend()
    plt.show()

How to motivate that orange line?

Part After.

Code looks like:

import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model
import os
from sklearn import preprocessing

graph = tf.get_default_graph()

# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')

# nonlinear
nonlin_model = tf.add(tf.multiply(tf.tanh(x_), w), b)
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.01).minimize(nonlin_loss)


# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)


liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)


#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)


nz = preprocessing.MaxAbsScaler()  # normalization coz tanh
norm_parrots = nz.fit_transform(parrots)
print(norm_parrots)

n_epochs = 20000
train_errors = []
non_train_errors = []
test_errors = []
weights = []
biases = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in tqdm.tqdm(range(n_epochs)):

        _, non_train_err, weight, bias = sess.run([train_step_nonlin, nonlin_loss, w, b],
                                      feed_dict={x_: norm_parrots, y_: liters})
        non_train_errors.append(non_train_err)
        weights.append(weight)
        biases.append(bias)


    plt.scatter(norm_parrots, liters, label='actual data')

    plt.plot(norm_parrots, sess.run(nonlin_model, feed_dict={x_: norm_parrots}), c='orange', label='nonlinear (tf)')
    plt.plot(norm_parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
    plt.legend()
    plt.show()

enter image description here Asyoucanclearlysee we got some improvements for orange line (not quite good as rbf, but its just need more work).

Manolo Santos · Accepted Answer

You are using tf.tanh as activation, it means that your output is limited in the range [-1,1]. Therefore it will never fit your data.

Edit: I have removed a part noticing a typo that has already been fixed.

Tensorflow. Nonlinear regression

Tags:

python

tensorflow

non-linear-regression

How to motivate that orange line?

Grail Finder

1 Answers

Manolo Santos

Recent Activity

Donate For Us

Tensorflow. Nonlinear regression

Tags:

python

tensorflow

non-linear-regression

How to motivate that orange line?

Grail Finder

1 Answers

Manolo Santos

Related questions

Recent Activity

Donate For Us