I'm new to python statsmodels package. I'm trying to simulate some data linearly related to log(x) and run a simple linear regression using statsmodels formula interface. Here are the codes:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
B0 = 3
B1 = 0.5
x = np.linspace(10, 1e4, num = 1000)
epsilon = np.random.normal(0,3, size=1000)
y=B0 + B1*np.log(x)+epsilon
df1 = pd.DataFrame({'Y':y, 'X':x})
model = smf.OLS ('Y~np.log(X)', data=df1).fit()
I got error below:
ValueError                                Traceback (most recent call last)
<ipython-input-34-c0ab32ca2acf> in <module>()
      7 y=B0 + B1*np.log(X)+epsilon
      8 df1 = pd.DataFrame({'Y':y, 'X':X})
----> 9 smf.OLS ('Y~np.log(X)', data=df1)
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
    689                  **kwargs):
    690         super(OLS, self).__init__(endog, exog, missing=missing,
--> 691                                   hasconst=hasconst, **kwargs)
    692         if "weights" in self._init_keys:
    693             self._init_keys.remove("weights")
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, weights, missing, hasconst, **kwargs)
    584             weights = weights.squeeze()
    585         super(WLS, self).__init__(endog, exog, missing=missing,
--> 586                                   weights=weights, hasconst=hasconst, **kwargs)
    587         nobs = self.exog.shape[0]
    588         weights = self.weights
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, **kwargs)
     89     """
     90     def __init__(self, endog, exog, **kwargs):
---> 91         super(RegressionModel, self).__init__(endog, exog, **kwargs)
     92         self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
     93 
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
    184 
    185     def __init__(self, endog, exog=None, **kwargs):
--> 186         super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
    187         self.initialize()
    188 
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
     58         hasconst = kwargs.pop('hasconst', None)
     59         self.data = self._handle_data(endog, exog, missing, hasconst,
---> 60                                       **kwargs)
     61         self.k_constant = self.data.k_constant
     62         self.exog = self.data.exog
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
     82 
     83     def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
---> 84         data = handle_data(endog, exog, missing, hasconst, **kwargs)
     85         # kwargs arrays could have changed, easier to just attach here
     86         for key in kwargs:
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
    562         exog = np.asarray(exog)
    563 
--> 564     klass = handle_data_class_factory(endog, exog)
    565     return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
    566                  **kwargs)
/Users/tiger/anaconda/lib/python3.5/site-packages/statsmodels/base/data.py in handle_data_class_factory(endog, exog)
    551     else:
    552         raise ValueError('unrecognized data structures: %s / %s' %
--> 553                          (type(endog), type(exog)))
    554     return klass
    555 
ValueError: unrecognized data structures: <class 'str'> / <class 'NoneType'>
I checked the documentations and everything seems to be right. Spent long time trying to understand why I got these errors but could not figure out. Help is very much appreciated.
In statsmodels.formula.api the ols method is lowercase. In statsmodels.api the OLS is all caps. In your case you need...
model = smf.ols('Y~np.log(X)', data=df1).fit()
                        If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With