Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

statsmodel: simulate data and run simple linear regression

I'm new to python statsmodels package. I'm trying to simulate some data linearly related to log(x) and run a simple linear regression using statsmodels formula interface. Here are the codes:

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

B0 = 3
B1 = 0.5
x = np.linspace(10, 1e4, num = 1000)
epsilon = np.random.normal(0,3, size=1000)

y=B0 + B1*np.log(x)+epsilon
df1 = pd.DataFrame({'Y':y, 'X':x})

model = smf.OLS ('Y~np.log(X)', data=df1).fit()

I got error below:

ValueError                                Traceback (most recent call last)
<ipython-input-34-c0ab32ca2acf> in <module>()
      7 y=B0 + B1*np.log(X)+epsilon
      8 df1 = pd.DataFrame({'Y':y, 'X':X})
----> 9 smf.OLS ('Y~np.log(X)', data=df1)

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
    689                  **kwargs):
    690         super(OLS, self).__init__(endog, exog, missing=missing,
--> 691                                   hasconst=hasconst, **kwargs)
    692         if "weights" in self._init_keys:
    693             self._init_keys.remove("weights")

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, weights, missing, hasconst, **kwargs)
    584             weights = weights.squeeze()
    585         super(WLS, self).__init__(endog, exog, missing=missing,
--> 586                                   weights=weights, hasconst=hasconst, **kwargs)
    587         nobs = self.exog.shape[0]
    588         weights = self.weights

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, **kwargs)
     89     """
     90     def __init__(self, endog, exog, **kwargs):
---> 91         super(RegressionModel, self).__init__(endog, exog, **kwargs)
     92         self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
     93 

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
    184 
    185     def __init__(self, endog, exog=None, **kwargs):
--> 186         super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
    187         self.initialize()
    188 

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
     58         hasconst = kwargs.pop('hasconst', None)
     59         self.data = self._handle_data(endog, exog, missing, hasconst,
---> 60                                       **kwargs)
     61         self.k_constant = self.data.k_constant
     62         self.exog = self.data.exog

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
     82 
     83     def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
---> 84         data = handle_data(endog, exog, missing, hasconst, **kwargs)
     85         # kwargs arrays could have changed, easier to just attach here
     86         for key in kwargs:

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
    562         exog = np.asarray(exog)
    563 
--> 564     klass = handle_data_class_factory(endog, exog)
    565     return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
    566                  **kwargs)

/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/data.py in handle_data_class_factory(endog, exog)
    551     else:
    552         raise ValueError('unrecognized data structures: %s / %s' %
--> 553                          (type(endog), type(exog)))
    554     return klass
    555 

ValueError: unrecognized data structures: <class 'str'> / <class 'NoneType'>

I checked the documentations and everything seems to be right. Spent long time trying to understand why I got these errors but could not figure out. Help is very much appreciated.

like image 259
zesla Avatar asked Dec 24 '22 21:12

zesla


1 Answers

In statsmodels.formula.api the ols method is lowercase. In statsmodels.api the OLS is all caps. In your case you need...

model = smf.ols('Y~np.log(X)', data=df1).fit()
like image 123
rtk22 Avatar answered Mar 04 '23 14:03

rtk22