I have a CSV file with the column names in upper case. I am reading the data using csv.dictreader but need the column names in lowercase.
I found this code here Accessing csv header white space and case insensitive
import csv
class DictReaderInsensitive(csv.DictReader):
# This class overrides the csv.fieldnames property.
# All fieldnames are without white space and in lower case
@property
def fieldnames(self):
return [field.strip().lower() for field in super(DictReaderInsensitive, self).fieldnames]
def __next__(self):
# get the result from the original __next__, but store it in DictInsensitive
dInsensitive = DictInsensitive()
dOriginal = super(DictReaderInsensitive, self).__next__()
# store all pairs from the old dict in the new, custom one
for key, value in dOriginal.items():
dInsensitive[key] = value
return dInsensitive
class DictInsensitive(dict):
# This class overrides the __getitem__ method to automatically strip() and lower() the input key
def __getitem__(self, key):
return dict.__getitem__(self, key.strip().lower())
My problem is that when I run this with
datafile = open(self.ifs_data_file,'rU')
csvDict = DictReaderInsensitive(datafile)
for row in csvDict:
print row
#self.db.ifs_data.insert(**row)
#self.db.commit()
I get this error
Traceback (most recent call last):
File "D:\Development\python\supplier_review\supplier_review.py", line 239, in update_ifs_data
for row in csvDict:
File "D:\Python27_5\lib\csv.py", line 103, in next
self.fieldnames
File "D:\Development\python\supplier_review\supplier_review.py", line 288, in fieldnames
return [field.strip().lower() for field in super(DictReaderInsensitive, self).fieldnames]
TypeError: must be type, not classobj
You could lowercase the first line of the file before passing it to DictReader
:
import csv
import itertools
def lower_first(iterator):
return itertools.chain([next(iterator).lower()], iterator)
with open(ifs_data_file, 'rU') as datafile:
csvDict = csv.DictReader(lower_first(datafile))
for row in csvDict:
print row
DictReader
is an old-style object, so super()
doesn't work at all here. You need to directly access the property
object in the parent class. In Python 2, you want to override the .next()
method, not .__next__()
:
class DictReaderInsensitive(csv.DictReader):
# This class overrides the csv.fieldnames property.
# All fieldnames are without white space and in lower case
@property
def fieldnames(self):
return [field.strip().lower() for field in csv.DictReader.fieldnames.fget(self)]
def next(self):
return DictInsensitive(csv.DictReader.next(self))
Demo:
>>> example = '''\
... foo,Bar,BAZ
... 42,3.14159,Hello world!'''.splitlines()
>>> csvDict = DictReaderInsensitive(example)
>>> row = next(csvDict)
>>> print row
{'bar': '3.14159', 'foo': '42', 'baz': 'Hello world!'}
>>> row['BAZ']
'Hello world!'
For a much simpler approach, you can simply update the DictReader.fieldnames attribute before accessing your dictionary, as in:
>>> f = open('example-x-y-time.csv', 'rb')
>>> reader = csv.DictReader(f)
>>> reader.fieldnames
['Latitude', 'Longitude', 'Date']
>>> print next(reader)
{'Latitude': '44.8982391', 'Date': '2004-07-12', 'Longitude': '-117.7791061'}
>>> reader.fieldnames = [name.lower() for name in reader.fieldnames]
>>> print next(reader)
{'latitude': '44.6637001', 'date': '1964-04-03', 'longitude': '-123.5997009'}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With