Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to use statsmodels.tsa.seasonal.seasonal_decompose with a pandas dataframe

from statsmodels.tsa.seasonal import seasonal_decompose
      def seasonal_decomp(df, model="additive"):
        seasonal_df = None
        seasonal_df = seasonal_decompose(df, model='additive')
        return seasonal_df
    
 seasonal_decomp(df)

Error

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-93-00543113a58a> in <module>
----> 1 seasonal_decompose(df, model='additive')

e:\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
    197                 else:
    198                     kwargs[new_arg_name] = new_arg_value
--> 199             return func(*args, **kwargs)
    200 
    201         return cast(F, wrapper)

e:\Anaconda3\lib\site-packages\statsmodels\tsa\seasonal.py in seasonal_decompose(x, model, filt, period, two_sided, extrapolate_trend)
    185     for s, name in zip((seasonal, trend, resid, x),
    186                        ('seasonal', 'trend', 'resid', None)):
--> 187         results.append(pw.wrap(s.squeeze(), columns=name))
    188     return DecomposeResult(seasonal=results[0], trend=results[1],
    189                            resid=results[2], observed=results[3])

e:\Anaconda3\lib\site-packages\statsmodels\tools\validation\validation.py in wrap(self, obj, columns, append, trim_start, trim_end)
    216                     new.append(append if c is None else str(c) + '_' + append)
    217                 columns = new
--> 218             return pd.DataFrame(obj, columns=columns, index=index)
    219         else:
    220             raise ValueError('Can only wrap 1 or 2-d array_like')

e:\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
    495                 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
    496             else:
--> 497                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
    498 
    499         # For data is list-like, or Iterable (will consume into list)

e:\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
    201 
    202     # _prep_ndarray ensures that values.ndim == 2 at this point
--> 203     index, columns = _get_axes(
    204         values.shape[0], values.shape[1], index=index, columns=columns
    205     )

e:\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in _get_axes(N, K, index, columns)
    460         columns = ibase.default_index(K)
    461     else:
--> 462         columns = ensure_index(columns)
    463     return index, columns
    464 

e:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in ensure_index(index_like, copy)
   5612             index_like = copy_func(index_like)
   5613 
-> 5614     return Index(index_like)
   5615 
   5616 

e:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
    409 
    410         elif data is None or is_scalar(data):
--> 411             raise cls._scalar_data_error(data)
    412         elif hasattr(data, "__array__"):
    413             return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)

TypeError: Index(...) must be called with a collection of some kind, 'seasonal' was passed

Test Data

df = pd.DataFrame.from_dict(data, orient='index')

data = {pd.Timestamp('2020-01-23 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-24 00:00:00'): {'LA': 1.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-25 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-26 00:00:00'): {'LA': 3.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-27 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-28 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-29 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-30 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 1.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-01-31 00:00:00'): {'LA': 2.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 2.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-01 00:00:00'): {'LA': 1.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-02 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 1.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-03 00:00:00'): {'LA': 3.0,
  'NY': 0.0,
  'Miami': 1.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-04 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-05 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-06 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-07 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-08 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-09 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-10 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-11 00:00:00'): {'LA': 1.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-12 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-13 00:00:00'): {'LA': 1.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-14 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-15 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-16 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-17 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-18 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-19 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-20 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-21 00:00:00'): {'LA': 2.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-22 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-23 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-24 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-25 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-26 00:00:00'): {'LA': 0.0,
  'NY': 1.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-27 00:00:00'): {'LA': 1.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-28 00:00:00'): {'LA': 0.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-02-29 00:00:00'): {'LA': 8.0,
  'NY': 1.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-01 00:00:00'): {'LA': 6.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-02 00:00:00'): {'LA': 23.0,
  'NY': 0.0,
  'Miami': 2.0,
  'Seattle': 1.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-03 00:00:00'): {'LA': 20.0,
  'NY': 0.0,
  'Miami': 0.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-04 00:00:00'): {'LA': 31.0,
  'NY': 2.0,
  'Miami': 23.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-05 00:00:00'): {'LA': 70.0,
  'NY': 0.0,
  'Miami': 2.0,
  'Seattle': 1.0,
  'San Diego': 1.0},
 pd.Timestamp('2020-03-06 00:00:00'): {'LA': 48.0,
  'NY': 9.0,
  'Miami': 1.0,
  'Seattle': 9.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-07 00:00:00'): {'LA': 115.0,
  'NY': 0.0,
  'Miami': 3.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-08 00:00:00'): {'LA': 114.0,
  'NY': 7.0,
  'Miami': 5.0,
  'Seattle': 4.0,
  'San Diego': 2.0},
 pd.Timestamp('2020-03-09 00:00:00'): {'LA': 68.0,
  'NY': 5.0,
  'Miami': 4.0,
  'Seattle': 0.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-10 00:00:00'): {'LA': 192.0,
  'NY': 6.0,
  'Miami': 13.0,
  'Seattle': 3.0,
  'San Diego': 4.0},
 pd.Timestamp('2020-03-11 00:00:00'): {'LA': 398.0,
  'NY': 7.0,
  'Miami': 6.0,
  'Seattle': 0.0,
  'San Diego': 6.0},
 pd.Timestamp('2020-03-12 00:00:00'): {'LA': 452.0,
  'NY': 14.0,
  'Miami': 11.0,
  'Seattle': 8.0,
  'San Diego': 4.0},
 pd.Timestamp('2020-03-13 00:00:00'): {'LA': 596.0,
  'NY': 99.0,
  'Miami': 9.0,
  'Seattle': 17.0,
  'San Diego': 7.0},
 pd.Timestamp('2020-03-14 00:00:00'): {'LA': 713.0,
  'NY': 0.0,
  'Miami': 20.0,
  'Seattle': 14.0,
  'San Diego': 14.0},
 pd.Timestamp('2020-03-15 00:00:00'): {'LA': 98.0,
  'NY': 11.0,
  'Miami': 11.0,
  'Seattle': 4.0,
  'San Diego': 13.0},
 pd.Timestamp('2020-03-16 00:00:00'): {'LA': 1392.0,
  'NY': 38.0,
  'Miami': 6.0,
  'Seattle': 27.0,
  'San Diego': 11.0},
 pd.Timestamp('2020-03-17 00:00:00'): {'LA': 1781.0,
  'NY': 121.0,
  'Miami': 23.0,
  'Seattle': 24.0,
  'San Diego': 0.0},
 pd.Timestamp('2020-03-18 00:00:00'): {'LA': 2776.0,
  'NY': 51.0,
  'Miami': 14.0,
  'Seattle': 33.0,
  'San Diego': 54.0},
 pd.Timestamp('2020-03-19 00:00:00'): {'LA': 5240.0,
  'NY': 249.0,
  'Miami': 38.0,
  'Seattle': 52.0,
  'San Diego': 34.0},
 pd.Timestamp('2020-03-20 00:00:00'): {'LA': 5322.0,
  'NY': 172.0,
  'Miami': 50.0,
  'Seattle': 54.0,
  'San Diego': 52.0},
 pd.Timestamp('2020-03-21 00:00:00'): {'LA': 6346.0,
  'NY': 228.0,
  'Miami': 86.0,
  'Seattle': 53.0,
  'San Diego': 38.0},
 pd.Timestamp('2020-03-22 00:00:00'): {'LA': 7936.0,
  'NY': 525.0,
  'Miami': 66.0,
  'Seattle': 61.0,
  'San Diego': 34.0}}
like image 842
New_to_Python Avatar asked Oct 15 '22 00:10

New_to_Python


1 Answers

  • The issue is here, seasonal_decompose(df, model='additive'), the entire dataframe is being passed to seasonal_decompose, but you may only pass one column, and a datetime index.
  • The function has been updated to use a list comprehension to calculate the .trend for each column, and then combine the data into a single dataframe with pandas.concat.
from statsmodels.tsa.seasonal import seasonal_decompose
import pandas as pd

# dataframe from sample; in this case the index is already a datetime
df = pd.DataFrame.from_dict(data, orient='index')

# if the index is not a datetime format
df.index = pd.to_datetime(df.index)

# perform seasonal decompose in a list comprehension on each column, return dataframe
def season_decom(df, model='additive'):    
    return pd.concat([pd.DataFrame({col: seasonal_decompose(df[col], model=model).trend}) for col in df.columns], axis=1)


# call function
df_seasonal = season_decom(df)

# df_seasonal.head()
                  LA   NY     Miami   Seattle  San Diego
2020-01-23       NaN  NaN       NaN       NaN        NaN
2020-01-24       NaN  NaN       NaN       NaN        NaN
2020-01-25       NaN  NaN       NaN       NaN        NaN
2020-01-26  0.571429  0.0  0.000000  0.000000        0.0
2020-01-27  0.571429  0.0  0.142857  0.000000        0.0
2020-01-28  0.714286  0.0  0.142857  0.285714        0.0
2020-01-29  0.857143  0.0  0.142857  0.285714        0.0
2020-01-30  0.428571  0.0  0.285714  0.285714        0.0
2020-01-31  0.857143  0.0  0.428571  0.285714        0.0
2020-02-01  0.857143  0.0  0.428571  0.285714        0.0

Simplified Version

  • apply seasonal_decompose to each column with .apply
df_seasonal = df.apply(lambda x: seasonal_decompose(x, model='additive').trend)
like image 169
Trenton McKinney Avatar answered Oct 19 '22 00:10

Trenton McKinney