I'm working on a new data structure that subclasses pandas DataFrame. I want to enforce my new data structure to have new_property, so that it can be processed safely later on. However, I'm running into error when using my new data structure, because the constructor gets called by some internal pandas function without the required property. Here is my new data structure.
import pandas as pd
class MyDataFrame(pd.DataFrame):
    @property
    def _constructor(self):
        return MyDataFrame
    _metadata = ['new_property']
    def __init__(self, data, new_property, index=None, columns=None, dtype=None, copy=True):
        super(MyDataFrame, self).__init__(data=data,
                                          index=index,
                                          columns=columns,
                                          dtype=dtype,
                                          copy=copy)
        self.new_property = new_property
Here is an example that causes error
data1 = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [15, 25, 30], 'd': [1, 1, 2]}
df1 = MyDataFrame(data1, new_property='value')
df1[['a', 'b']]
Here is the error message
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-
packages\IPython\core\interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-33-b630fbf14234>", line 1, in <module>
    df1[['a', 'b']]
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2053, in __getitem__
    return self._getitem_array(key)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2098, in _getitem_array
    return self.take(indexer, axis=1, convert=True)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1670, in take
    result = self._constructor(new_data).__finalize__(self)
TypeError: __init__() missing 1 required positional argument: 'new_property'
Is there a fix to this or an alternative way to design this to enforce my new data structure to have new_property?
Thanks in advance!
This question has been answered by a brilliant pandas developer. See this issue for more details. Pasting the answer here.
class MyDataFrame(pd.DataFrame):
    @property
    def _constructor(self):
        return MyDataFrame._internal_ctor
    _metadata = ['new_property']
    @classmethod
    def _internal_ctor(cls, *args, **kwargs):
        kwargs['new_property'] = None
        return cls(*args, **kwargs)
    def __init__(self, data, new_property, index=None, columns=None, dtype=None, copy=True):
        super(MyDataFrame, self).__init__(data=data,
                                      index=index,
                                      columns=columns,
                                      dtype=dtype,
                                      copy=copy)
        self.new_property = new_property
data1 = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [15, 25, 30], 'd': [1, 1, 2]}
df1 = MyDataFrame(data1, new_property='value')
df1[['a', 'b']].new_property
Out[121]: 'value'
MyDataFrame(data1)
TypeError: __init__() missing 1 required positional argument: 'new_property'
                        If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With