The code
import numpy as np
from pandas.tools.plotting import autocorrelation_plot
import matplotlib.pyplot as plt
nobs = 10000
xx = np.random.normal(size=nobs)
autocorrelation_plot(xx)
plt.show()
plots the autocorrelations of xx, but it plots all 10000 lags. How do I plot only the first 10?
The function autocorrelation_plot starts as follows:
def autocorrelation_plot(series, ax=None, **kwds):
"""Autocorrelation plot for time series.
Parameters:
-----------
series: Time series
ax: Matplotlib axis object, optional
kwds : keywords
Options to pass to matplotlib plotting method
Is there a way to set the number of lags plotted using the **kwds argument?
autocorrelation_plot
returns a matplotlib.axis object. Hence, you can simply use the set_xlim()
method to limit the x-axis:
autocorrelation_plot(xx).set_xlim([0, 10])
Reference
Just as a backup solution, if one does not need to use pandas
methods. There is a statsmodels
function plot_acf
in which you can set the lags
argument.
from statsmodels.graphics.tsaplots import plot_acf
import pandas as pd
d = dict()
d['value'] = [11, 22, 34, 22, 43, 23, 45, 32, 56, 40, 44, 33, 22, 56, 44]
df = pd.DataFrame.from_dict(d)
plot_acf(df, lags = 5)
The autocorrelation_plot function is a high level function. Viewing the code from the pandas library:
def autocorrelation_plot(series, ax=None, **kwds):
"""Autocorrelation plot for time series.
Parameters:
-----------
series: Time series
ax: Matplotlib axis object, optional
kwds : keywords
Options to pass to matplotlib plotting method
Returns:
-----------
ax: Matplotlib axis object
"""
import matplotlib.pyplot as plt
n = len(series)
data = np.asarray(series)
if ax is None:
ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0))
mean = np.mean(data)
c0 = np.sum((data - mean) ** 2) / float(n)
def r(h):
return ((data[:n - h] - mean) *
(data[h:] - mean)).sum() / float(n) / c0
x = np.arange(n) + 1
y = lmap(r, x)
z95 = 1.959963984540054
z99 = 2.5758293035489004
ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey')
ax.axhline(y=z95 / np.sqrt(n), color='grey')
ax.axhline(y=0.0, color='black')
ax.axhline(y=-z95 / np.sqrt(n), color='grey')
ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey')
ax.set_xlabel("Lag")
ax.set_ylabel("Autocorrelation")
ax.plot(x, y, **kwds)
if 'label' in kwds:
ax.legend()
ax.grid()
return ax
There is a tab missing from all the line in the function.
Adding to the header:
from pandas.compat import lmap
In the 4th line before the end change ax.plot(x, y, **kwds) to ax.plot(x[:10], y[:10], **kwds)
I've added a n_samples variables:
from pandas.compat import lmap
def autocorrelation_plot(series, n_samples=None, ax=None, **kwds):
"""Autocorrelation plot for time series.
Parameters:
-----------
series: Time series
ax: Matplotlib axis object, optional
kwds : keywords
Options to pass to matplotlib plotting method
Returns:
-----------
ax: Matplotlib axis object
"""
import matplotlib.pyplot as plt
n = len(series)
data = np.asarray(series)
if ax is None:
ax = plt.gca(xlim=(1, n_samples), ylim=(-1.0, 1.0))
mean = np.mean(data)
c0 = np.sum((data - mean) ** 2) / float(n)
def r(h):
return ((data[:n - h] - mean) *
(data[h:] - mean)).sum() / float(n) / c0
x = (np.arange(n) + 1).astype(int)
y = lmap(r, x)
z95 = 1.959963984540054
z99 = 2.5758293035489004
ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey')
ax.axhline(y=z95 / np.sqrt(n), color='grey')
ax.axhline(y=0.0, color='black')
ax.axhline(y=-z95 / np.sqrt(n), color='grey')
ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey')
ax.set_xlabel("Lag")
ax.set_ylabel("Autocorrelation")
if n_samples:
ax.plot(x[:n_samples], y[:n_samples], **kwds)
else:
ax.plot(x, y, **kwds)
if 'label' in kwds:
ax.legend()
ax.grid()
return ax
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With