A bit new here but trying to get a statsmodel ARMA prediction tool to work. I've imported some stock data from Yahoo and gotten the ARMA to give me fitting parameters. However when I use the predict code all I receive is a list of errors that I don't seem to be able to figure out. Not quite sure what I'm doing wrong here:
import pandas
import statsmodels.tsa.api as tsa
from pandas.io.data import DataReader
start = pandas.datetime(2013,1,1)
end = pandas.datetime.today()
data = DataReader('GOOG','yahoo')
arma =tsa.ARMA(data['Close'], order =(2,2))
results= arma.fit()
results.predict(start=start,end=end)
The errors are:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
C:\Windows\system32\<ipython-input-84-25a9b6bc631d> in <module>()
13 results= arma.fit()
14 results.summary()
---> 15 results.predict(start=start,end=end)
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\base\wrapp
er.pyc in wrapper(self, *args, **kwargs)
88 results = object.__getattribute__(self, '_results')
89 data = results.model.data
---> 90 return data.wrap_output(func(results, *args, **kwargs), how)
91
92 argspec = inspect.getargspec(func)
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\arima_
model.pyc in predict(self, start, end, exog, dynamic)
1265
1266 """
-> 1267 return self.model.predict(self.params, start, end, exog, dynamic
)
1268
1269 def forecast(self, steps=1, exog=None, alpha=.05):
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\arima_
model.pyc in predict(self, params, start, end, exog, dynamic)
497
498 # will return an index of a date
--> 499 start = self._get_predict_start(start, dynamic)
500 end, out_of_sample = self._get_predict_end(end, dynamic)
501 if out_of_sample and (exog is None and self.k_exog > 0):
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\arima_
model.pyc in _get_predict_start(self, start, dynamic)
404 #elif 'mle' not in method or dynamic: # should be on a date
405 start = _validate(start, k_ar, k_diff, self.data.dates,
--> 406 method)
407 start = super(ARMA, self)._get_predict_start(start)
408 _check_arima_start(start, k_ar, k_diff, method, dynamic)
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\arima_
model.pyc in _validate(start, k_ar, k_diff, dates, method)
160 if isinstance(start, (basestring, datetime)):
161 start_date = start
--> 162 start = _index_date(start, dates)
163 start -= k_diff
164 if 'mle' not in method and start < k_ar - k_diff:
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\base\d
atetools.pyc in _index_date(date, dates)
37 freq = _infer_freq(dates)
38 # we can start prediction at the end of endog
---> 39 if _idx_from_dates(dates[-1], date, freq) == 1:
40 return len(dates)
41
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\base\d
atetools.pyc in _idx_from_dates(d1, d2, freq)
70 from pandas import DatetimeIndex
71 return len(DatetimeIndex(start=d1, end=d2,
---> 72 freq = _freq_to_pandas[freq])) - 1
73 except ImportError, err:
74 from pandas import DateRange
D:\Python27\lib\site-packages\statsmodels-0.5.0-py2.7.egg\statsmodels\tsa\base\d
atetools.pyc in __getitem__(self, key)
11 # being lazy, don't want to replace dictionary below
12 def __getitem__(self, key):
---> 13 return get_offset(key)
14 _freq_to_pandas = _freq_to_pandas_class()
15 except ImportError, err:
D:\Python27\lib\site-packages\pandas\tseries\frequencies.pyc in get_offset(name)
484 """
485 if name not in _dont_uppercase:
--> 486 name = name.upper()
487
488 if name in _rule_aliases:
AttributeError: 'NoneType' object has no attribute 'upper'
Looks like a bug to me. I'll look into it.
https://github.com/statsmodels/statsmodels/issues/712
Edit: As a workaround, you can just drop the DatetimeIndex from the DataFrame and pass it the numpy array. It makes prediction a little trickier date-wise, but it's already pretty tricky to use dates for prediction when there is no frequency, so just having the starting and ending dates is essentially meaningless.
import pandas
import statsmodels.tsa.api as tsa
from pandas.io.data import DataReader
import pandas
data = DataReader('GOOG','yahoo')
dates = data.index
# start at a date on the index
start = dates.get_loc(pandas.datetools.parse("1-2-2013"))
end = start + 30 # "steps"
# NOTE THE .values
arma =tsa.ARMA(data['Close'].values, order =(2,2))
results= arma.fit()
results.predict(start, end)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With