I'm fairly new to this subject and I am working on a project that deals with detecting anomalies in time-series data. I want to use TensorFlow so that I could potentially deploy the model onto a mobile device. I'm having a difficult time finding relevant material and examples of anomaly detection algorithms implemented in TensorFlow.
Some algorithms I'm looking into are clustering algorithms for classifying windowed samples and Holt-Winters for streaming data.
Any example would help me tremendously!
Catching and identifying anomalies is what we call anomaly or outlier detection. For example, if large sums of money are spent one after another within one day and it is not your typical behavior, a bank can block your card. They will see an unusual pattern in your daily transactions.
There are three main classes of anomaly detection techniques: unsupervised, semi-supervised, and supervised. Essentially, the correct anomaly detection method depends on the available labels in the dataset.
Some of the popular techniques are: Statistical (Z-score, Tukey's range test and Grubbs's test) Density-based techniques (k-nearest neighbor, local outlier factor, isolation forests, and many more variations of this concept) Subspace-, correlation-based and tensor-based outlier detection for high-dimensional data.
An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image.
Here is an example of sequential filtering using Holt-Winters. The same pattern should work for other types of sequential modeling such as the Kalman filter.
from matplotlib import pyplot
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
seasonality = 10
def model_fn(features, targets):
"""Defines a basic Holt-Winters sequential filtering model in TensorFlow.
See http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc435.htm"""
times = features["times"]
values = features["values"]
# Initial estimates
initial_trend = tf.reduce_sum(
(values[seasonality:2*seasonality] - values[:seasonality])
/ seasonality ** 2)
initial_smoothed_observation = values[0]
# Seasonal indices are multiplicative, so having them near 0 leads to
# instability
initial_seasonal_indices = 1. + tf.exp(
tf.get_variable("initial_seasonal_indices", shape=[seasonality]))
with tf.variable_scope("smoothing_parameters",
initializer=tf.zeros_initializer):
# Trained scalars for smoothing, transformed to be in (0, 1)
observation_smoothing = tf.sigmoid(
tf.get_variable(name="observation_smoothing", shape=[]))
trend_smoothing = tf.sigmoid(
tf.get_variable(name="trend_smoothing", shape=[]))
seasonal_smoothing = tf.sigmoid(
tf.get_variable(name="seasonal_smoothing", shape=[]))
def filter_function(
current_index, seasonal_indices, previous_smoothed_observation,
previous_trend, previous_loss_sum):
current_time = tf.gather(times, current_index)
current_observation = tf.gather(values, current_index)
current_season = current_time % seasonality
one_step_ahead_prediction = (
(previous_smoothed_observation + previous_trend)
* tf.gather(seasonal_indices, current_season))
new_loss_sum = previous_loss_sum + (
one_step_ahead_prediction - current_observation) ** 2
new_smoothed_observation = (
(observation_smoothing * current_observation
/ tf.gather(seasonal_indices, current_season))
+ ((1. - observation_smoothing)
* (previous_smoothed_observation + previous_trend)))
new_trend = (
(trend_smoothing
* (new_smoothed_observation - previous_smoothed_observation))
+ (1. - trend_smoothing) * previous_trend)
updated_seasonal_index = (
seasonal_smoothing * current_observation / new_smoothed_observation
+ ((1. - seasonal_smoothing)
* tf.gather(seasonal_indices, current_season)))
new_seasonal_indices = tf.concat(
concat_dim=0,
values=[seasonal_indices[:current_season],
[updated_seasonal_index],
seasonal_indices[current_season + 1:]])
# Preserve shape to keep the while_loop shape invariants happy
new_seasonal_indices.set_shape(seasonal_indices.get_shape())
return (current_index + 1, new_seasonal_indices, new_smoothed_observation,
new_trend, new_loss_sum)
def while_run_condition(current_index, *unused_args):
return current_index < tf.shape(times)[0]
(_, final_seasonal_indices, final_smoothed_observation, final_trend,
sum_squared_errors) = tf.while_loop(
cond=while_run_condition,
body=filter_function,
loop_vars=[0, initial_seasonal_indices, initial_smoothed_observation,
initial_trend, 0.])
normalized_loss = sum_squared_errors / tf.cast(tf.shape(times)[0],
dtype=tf.float32)
train_op = tf.contrib.layers.optimize_loss(
loss=normalized_loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.1,
optimizer="Adam")
prediction_times = tf.range(30)
prediction_values = (
(final_smoothed_observation + final_trend * tf.cast(prediction_times,
dtype=tf.float32))
* tf.cast(tf.gather(params=final_seasonal_indices,
indices=prediction_times % seasonality),
dtype=tf.float32))
predictions = {"times": prediction_times,
"values": prediction_values}
return predictions, normalized_loss, train_op
# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (
0.02 * times + np.sin(times * 2 * np.pi / float(seasonality))
+ np.random.normal(size=[series_length], scale=0.2)).astype(np.float32)
# Define an input function to feed the data into our model
input_fn = lambda: ({"times":tf.convert_to_tensor(times, dtype=tf.int32),
"values":tf.convert_to_tensor(values, dtype=tf.float32)},
{})
# Wrap the model in a tf.learn Estimator for training and inference
estimator = tf.contrib.learn.Estimator(model_fn=model_fn)
estimator.fit(input_fn=input_fn, steps=500)
predictions = estimator.predict(input_fn=input_fn, as_iterable=False)
# Plot the training data and predictions
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + predictions["times"], predictions["values"])
pyplot.show()
(I was using TensorFlow 0.11.0rc0 when writing this)
Output of Holt-Winters on synthetic data: training data followed by predictions.
However, this code will be quite slow when scaling up to longer time series. The issue is that TensorFlow (and most other tools for automatic differentiation) do not have great performance on sequential computations (looping). Usually this is ameliorated by batching data and operating on large chunks. It's somewhat tricky to do for sequential models, since there is state which needs to be passed from one timestep to the next.
A much faster (but maybe less satisfying) approach is to use an autoregressive model. This has the added benefit of being very easy to implement in TensorFlow:
import numpy as np
from matplotlib import pyplot
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
seasonality = 10
# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (0.02 * times + np.sin(times * 2 * np.pi / float(seasonality))
+ np.random.normal(size=[series_length], scale=0.2)).astype(
np.float32)
# Parameters for stochastic gradient descent
batch_size = 16
window_size = 10
# Define a column format for the linear regression
input_column = tf.contrib.layers.real_valued_column(column_name="input_window",
dimension=window_size)
def training_input_fn():
window_starts = tf.random_uniform(shape=[batch_size], dtype=tf.int32,
maxval=series_length - window_size - 1)
element_indices = (tf.expand_dims(window_starts, 1)
+ tf.expand_dims(tf.range(window_size), 0))
return ({input_column: tf.gather(values, element_indices)},
tf.gather(values, window_starts + window_size))
estimator = tf.contrib.learn.LinearRegressor(feature_columns=[input_column])
estimator.fit(input_fn=training_input_fn, steps=500)
predictions = list(values[-10:])
def predict_input_fn():
return ({input_column: tf.reshape(predictions[-10:], [1, 10])}, {})
predict_length = 30
for i in xrange(predict_length):
prediction = estimator.predict(input_fn=predict_input_fn, as_iterable=False)
predictions.append(prediction[0])
predictions = predictions[10:]
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + np.arange(predict_length), predictions)
pyplot.show()
Output of the autoregressive model on the same synthetic dataset.
Notice that since the model has no state to keep, we can do mini-batch stochastic gradient descent very easily.
For clustering, something like k-means could work for time series.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With