I've been using the apply method on a dataframe to create new columns. So, if I have a df that looks like this:
stdf.columns
Index(['Username', 'First Name', 'Last Name', 'Class', 'Screens Typed','Time Spent', 'Avg Speed', 'Avg Acc'], dtype='object')
I've be using syntax like this to create new columns
stdf['uid'] = stdf['Username'].apply(lambda x: x[0:6]) + "-" + stdf['First Name'] + "-" + stdf['Last Name']
Today when using that same method to create a new column I get a keyerror on the new column name
stdf['truSpeed'] = stdf['nSpeed'].apply(lambda x: x * .1 * stdf["truAcc"])
yes, 'nSpeed' and 'truAcc' do exist as columns.
Index(['Username', 'First Name', 'Last Name', 'Class', 'Screens Typed', 'Time Spent', 'Avg Speed', 'Avg Acc', 'truTime', 'uid', 'truAcc',
'nSpeed'], dtype='object')
The keyerror points to the 'truSpeed identifier. So my question is why is pandas now telling me I have a keyerror when trying to create a new column when it has always created the new column in the past?
There must be some other error that I'm not seeing.
Here's the almost full traceback
KeyError Traceback (most recent call last)
/home/david/dev/msc/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'truSpeed'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/internals.py in set(self, item, value, check)
3667 try:
-> 3668 loc = self.items.get_loc(item)
3669 except KeyError:
/home/david/dev/msc/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'truSpeed'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-18-35d20ff4edf0> in <module>()
4 stdf['nSpeed'] = stdf['Avg Speed'].apply(lambda x: int(x.split(" ")[0]))
5 print(stdf.columns)
----> 6 stdf['truSpeed'] = stdf['nSpeed'].apply(lambda x: x * .1 * stdf["truAcc"])
7 # stdf['truSpeed']
8 # print(stdf.columns)
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2417 else:
2418 # set column
-> 2419 self._set_item(key, value)
2420
2421 def _setitem_slice(self, key, value):
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2484 self._ensure_valid_index(value)
2485 value = self._sanitize_column(key, value)
-> 2486 NDFrame._set_item(self, key, value)
2487
2488 # check if we are modifying a copy
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/generic.py in _set_item(self, key, value)
1498
1499 def _set_item(self, key, value):
-> 1500 self._data.set(key, value)
1501 self._clear_item_cache()
1502
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/internals.py in set(self, item, value, check)
3669 except KeyError:
3670 # This item wasn't present, just insert at end
-> 3671 self.insert(len(self.items), item, value)
3672 return
3673
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
3770
3771 block = make_block(values=value, ndim=self.ndim,
-> 3772 placement=slice(loc, loc + 1))
3773
3774 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
2683 placement=placement, dtype=dtype)
2684
-> 2685 return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
2686
2687 # TODO: flexible with index=None and/or items=None
/home/david/dev/msc/lib/python3.5/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
107 raise ValueError('Wrong number of items passed %d, placement '
108 'implies %d' % (len(self.values),
--> 109 len(self.mgr_locs)))
110
111 @property
ValueError: Wrong number of items passed 58, placement implies 1
stdf['truSpeed'] = stdf['nSpeed'].apply(lambda x: x * .1 * stdf["truAcc"])
Should be
stdf['truSpeed'] = stdf.eval('nSpeed * truAcc * .1')
Or
stdf['truSpeed'] = stdf['nSpeed'] * stdf['truAcc'] * .1
Or the slow way with
stdf['truSpeed'] = stdf.apply(lambda x: x['nSpeed'] * x['truAcc'] * .1, axis=1)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With