I've been reading from an excel sheet for the past month with no problems using Pandas.
Recently though I made a change to my date formats (which have now been switched back to their original formats mm/dd/yyyy).
All of sudden Pandas throws an error now when I try to read my .xlsm
Below is the full error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-655581a0eaab> in <module>
----> 1 inv = p.read(r'C:\Users\Jake\OneDrive\Documents\Eastern\Inventory\Inventory_Log.xlsm')
~\Anaconda3\lib\site-packages\pyinv\__init__.py in read(filepath)
36 'Cert_Expire':'string',
37 'Rating':'string',
---> 38 'Notes':'string'
39 })
40 amount = inv.Item.count()
~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
332 convert_float=convert_float,
333 mangle_dupe_cols=mangle_dupe_cols,
--> 334 **kwds,
335 )
336
~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py in parse(self, sheet_name, header, names, index_col, usecols, squeeze, converters, true_values, false_values, skiprows, nrows, na_values, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
883 convert_float=convert_float,
884 mangle_dupe_cols=mangle_dupe_cols,
--> 885 **kwds,
886 )
887
~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py in parse(self, sheet_name, header, names, index_col, usecols, squeeze, dtype, true_values, false_values, skiprows, nrows, na_values, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
510 )
511
--> 512 output[asheetname] = parser.read(nrows=nrows)
513
514 if not squeeze or isinstance(output[asheetname], DataFrame):
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
1131 def read(self, nrows=None):
1132 nrows = _validate_integer("nrows", nrows)
-> 1133 ret = self._engine.read(nrows)
1134
1135 # May alter columns / col_dict
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, rows)
2462 columns, data = self._do_date_conversions(columns, data)
2463
-> 2464 data = self._convert_data(data)
2465 index, columns = self._make_index(data, alldata, columns, indexnamerow)
2466
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _convert_data(self, data)
2532 self.verbose,
2533 clean_conv,
-> 2534 clean_dtypes,
2535 )
2536
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose, converters, dtypes)
1725 # invalid input to is_bool_dtype
1726 pass
-> 1727 cvals = self._cast_types(cvals, cast_type, c)
1728
1729 result[c] = cvals
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _cast_types(self, values, cast_type, column)
1823 array_type = cast_type.construct_array_type()
1824 try:
-> 1825 return array_type._from_sequence_of_strings(values, dtype=cast_type)
1826 except NotImplementedError:
1827 raise NotImplementedError(
~\Anaconda3\lib\site-packages\pandas\core\arrays\string_.py in _from_sequence_of_strings(cls, strings, dtype, copy)
199 @classmethod
200 def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
--> 201 return cls._from_sequence(strings, dtype=dtype, copy=copy)
202
203 def __arrow_array__(self, type=None):
~\Anaconda3\lib\site-packages\pandas\core\arrays\string_.py in _from_sequence(cls, scalars, dtype, copy)
195 result[na_values] = StringDtype.na_value
196
--> 197 return cls(result)
198
199 @classmethod
~\Anaconda3\lib\site-packages\pandas\core\arrays\string_.py in __init__(self, values, copy)
164 self._dtype = StringDtype()
165 if not skip_validation:
--> 166 self._validate()
167
168 def _validate(self):
~\Anaconda3\lib\site-packages\pandas\core\arrays\string_.py in _validate(self)
169 """Validate that we only store NA or strings."""
170 if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
--> 171 raise ValueError("StringArray requires a sequence of strings or pandas.NA")
172 if self._ndarray.dtype != "object":
173 raise ValueError(
ValueError: StringArray requires a sequence of strings or pandas.NA
This is happening because when pandas is reading it I'm having it convert the columns to string dtypes. No idea why the date formats broke this.
Currently using Pandas 1.0.1
Any information is greatly appreciated.
I will provide more details if needed.
According to the documentation Pandas doc:
The new df.astype()
method can work both with str
and "string"
The difference is: with the "string"
it will try to convert to StringArray which can't contain any number.
With str
it will be handle as an object type which is working as before.
Change dtype declaration for datetime columns from 'col1':'string'
to 'col1':str
.
This behaviour seems relative to changes in pandas 1.0.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With