I have copied this script from [python web site][1] This is another question but now problem with encoding:
import sqlite3 import csv import codecs import cStringIO import sys class UTF8Recoder: """ Iterator that reads an encoded stream and reencodes the input to UTF-8 """ def __init__(self, f, encoding): self.reader = codecs.getreader(encoding)(f) def __iter__(self): return self def next(self): return self.reader.next().encode("utf-8") class UnicodeReader: """ A CSV reader which will iterate over lines in the CSV file "f", which is encoded in the given encoding. """ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): f = UTF8Recoder(f, encoding) self.reader = csv.reader(f, dialect=dialect, **kwds) def next(self): row = self.reader.next() return [unicode(s, "utf-8") for s in row] def __iter__(self): return self class UnicodeWriter: """ A CSV writer which will write rows to CSV file "f", which is encoded in the given encoding. """ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def writerow(self, row): self.writer.writerow([s.encode("utf-8") for s in row]) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() data = data.decode("utf-8") # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream self.stream.write(data) # empty queue self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row)
This time problem with encoding, when I ran this it gave me this error:
Traceback (most recent call last): File "makeCSV.py", line 87, in <module> uW.writerow(d) File "makeCSV.py", line 54, in writerow self.writer.writerow([s.encode("utf-8") for s in row]) AttributeError: 'int' object has no attribute 'encode'
Then I converted all integers to string, but this time I got this error:
Traceback (most recent call last): File "makeCSV.py", line 87, in <module> uW.writerow(d) File "makeCSV.py", line 54, in writerow self.writer.writerow([str(s).encode("utf-8") for s in row]) UnicodeEncodeError: 'ascii' codec can't encode character u'\xf6' in position 1: ordinal not in range(128)
I have implemented above to deal with unicode characters, but it gives me such error. What is the problem and how to fix it?
Then I converted all integers to string,
You converted both integers and strings to byte strings. For strings this will use the default character encoding which happens to be ASCII, and this fails when you have non-ASCII characters. You want unicode
instead of str
.
self.writer.writerow([unicode(s).encode("utf-8") for s in row])
It might be better to convert everything to unicode before calling that method. The class is designed specifically for parsing Unicode strings. It was not designed to support other data types.
From the documentation:
Unlike the StringIO module, this module is not able to accept Unicode strings that cannot be encoded as plain ASCII strings.
I.e. only 7-bit clean strings can be stored.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With