I stumbled across a very simple situation which I cannot seem to find a solution for.
What I want to do is simple: write some data into a .csv file containing:
The way I'm doing it right now seems to be the only solution that I could come up with:
keys()
of every dictionary in the above list and add them to a set()
(this will be the header)writer.writerows(data)
Basically, a simple MCVE might look like this:
from csv import DictWriter
RESULT_FILE = 'test_result.csv'
def get_fieldnames(data):
fieldnames = set()
for item in data:
fieldnames.update(item.keys())
return fieldnames
def main(data):
fieldnames = get_fieldnames(data)
with open(RESULT_FILE, 'a', newline='', encoding='utf-8') as f:
writer = DictWriter(f, fieldnames=fieldnames, delimiter=',')
writer.writeheader()
writer.writerows(data)
if __name__ == '__main__':
data_ = [
{
'a': '1',
'b': '2',
'c': '3',
},
{
'a': '6',
'd': '1',
'b': '3',
},
{
'c': '2',
'e': '1',
'f': '9',
}
]
main(data_)
Now, what I don't like about this:
How can I avoid exporting all the data at once in the csv when the header is dynamic?
As requested, the real data looks like this:
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 24.70',
'Info': '',
'Line art': '',
'Name': '(5") Non-Vacuum Disc Pad Vinyl-Face',
'Product number': '91456106T',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsGr/120/107/6/1201076/1419675_700.jpg'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '',
'Each': '$ 8.19',
'Info': '<p><strong>material: </strong>Cork</p>',
'Line art': '',
'Name': 'Replacement Plate for MKT9924DB Belt Sander',
'Product number': 'MKT4230358',
'Technical specifications': '<p><strong>brand: </strong>Makita</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/116/631/4/1166314/1281513_700.jpg',
'\xa0': '$ 257.80'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '',
'Each': '$ 8.19',
'Info': '<p><strong>material: </strong>Graphite</p>',
'Line art': '',
'Name': 'Replacement Plate for MKT9924DB Belt Sander',
'Product number': 'MKT4230366',
'Technical specifications': '<p><strong>brand: </strong>Makita</p>',
'image_1': 'https://www.richelieu.com/documents/docsPr/MK/T4/23/03/66/MKT4230366/1281514_700.jpg',
'\xa0': '$ 257.80'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 38.47',
'Info': '',
'Line art': '',
'Name': 'Non-Grip Vacuum Pads',
'Product number': '9154325',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: '
'</strong>Medium</p><p><strong>nap: '
'</strong>Short</p>',
'image_1': 'https://www.richelieu.com/documents/docsPr/91/54/32/5/9154325/1213330_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/54/32/5/9154325/1213331_700.jpg'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 52.92',
'Info': '',
'Line art': '',
'Name': 'Non-Grip Vacuum Pads',
'Product number': '9154327',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: '
'</strong>Medium</p><p><strong>nap: '
'</strong>Short</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/105/122/1/1051221/1213328_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/54/32/7/9154327/1213332_700.jpg'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Unique one-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 26.84',
'Info': '',
'Line art': '',
'Name': 'Stick-on Non-Vacuum Pads',
'Product number': '9156106',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: </strong>Medium</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/105/122/4/1051224/1213343_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/56/10/6/9156106/1213345_700.jpg'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Unique one-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 51.70',
'Info': '',
'Line art': '',
'Name': 'Stick-on Non-Vacuum Pads',
'Product number': '9156107',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: </strong>Medium</p>',
'image_1': 'https://www.richelieu.com/documents/docsPr/91/56/10/7/9156107/1213344_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/56/10/7/9156107/1213346_700.jpg'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K060',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/06/0/PC371K060/1263523_700.jpg',
'\xa0': '$ 148.18'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K080',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/08/0/PC371K080/1263524_700.jpg',
'\xa0': '$ 148.18'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K120',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/12/0/PC371K120/1263526_700.jpg',
'\xa0': '$ 148.18'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K100',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/10/0/PC371K100/1263525_700.jpg',
'\xa0': '$ 148.18'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 25.22',
'Info': '',
'Line art': '',
'Name': '5" Non-Vacuum Disc Pad Hook-Face',
'Product number': '91454325T',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsGr/120/107/7/1201077/1419678_700.jpg'}
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Pads mount with screws.',
'Each': '$ 31.80',
'Info': '',
'Line art': '',
'Name': 'Plates for Non-Vacuum (Grip-On) Dynabug II Disc Pads - 7.62 cm x '
'10.79 cm (3" x 4-1/4")',
'Product number': '9156315',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: </strong>Medium</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/116/625/4/1166254/1280825_700.jpg',
'\xa0': '$ 179.95'}
Edit-1 26-Dec: Updated code to generate data based on your data
Based on your requirements I would suggest the below
Below is a quick/dirty POC of the same and it works well for me
import csv
try:
f = open("headers.csv", mode="r+", encoding="utf-8")
except FileNotFoundError:
f = open("headers.csv", mode="w+", encoding="utf-8")
f2 = open("data.csv", mode="a+", encoding="utf-8")
f.seek(0)
headers = f.readline().strip().split(",")
if headers == ['']:
headers = []
headers_map = {}
for index, field in enumerate(headers):
headers_map[field] = index
def update_header_dict(data):
updated_headers = False
for key in data.keys():
if key not in headers_map:
new_index = len(headers_map)
headers_map[key] = new_index
updated_headers = True
if updated_headers:
f.seek(0)
csv.DictWriter(f, headers_map.keys()).writeheader()
f.flush()
def get_row_data_dict(data):
row_data = [""] * len(headers_map)
for k, v in data.items():
# if v and v[0] in ('=', '-'):
# # Mark the value as text, only needed if you want to display data in excel
# # else should be commented out
# v = "'" + v
row_data[headers_map[k]] = v
return row_data
def main(data):
data_writer = csv.writer(f2)
for row in data:
update_header_dict(row)
data_writer.writerow(get_row_data_dict(row))
data_ = [
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 24.70',
'Info': '',
'Line art': '',
'Name': '(5") Non-Vacuum Disc Pad Vinyl-Face',
'Product number': '91456106T',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsGr/120/107/6/1201076/1419675_700.jpg'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '',
'Each': '$ 8.19',
'Info': '<p><strong>material: </strong>Cork</p>',
'Line art': '',
'Name': 'Replacement Plate for MKT9924DB Belt Sander',
'Product number': 'MKT4230358',
'Technical specifications': '<p><strong>brand: </strong>Makita</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/116/631/4/1166314/1281513_700.jpg',
'\xa0': '$ 257.80'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '',
'Each': '$ 8.19',
'Info': '<p><strong>material: </strong>Graphite</p>',
'Line art': '',
'Name': 'Replacement Plate for MKT9924DB Belt Sander',
'Product number': 'MKT4230366',
'Technical specifications': '<p><strong>brand: </strong>Makita</p>',
'image_1': 'https://www.richelieu.com/documents/docsPr/MK/T4/23/03/66/MKT4230366/1281514_700.jpg',
'\xa0': '$ 257.80'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 38.47',
'Info': '',
'Line art': '',
'Name': 'Non-Grip Vacuum Pads',
'Product number': '9154325',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: '
'</strong>Medium</p><p><strong>nap: '
'</strong>Short</p>',
'image_1': 'https://www.richelieu.com/documents/docsPr/91/54/32/5/9154325/1213330_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/54/32/5/9154325/1213331_700.jpg'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 52.92',
'Info': '',
'Line art': '',
'Name': 'Non-Grip Vacuum Pads',
'Product number': '9154327',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: '
'</strong>Medium</p><p><strong>nap: '
'</strong>Short</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/105/122/1/1051221/1213328_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/54/32/7/9154327/1213332_700.jpg'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Unique one-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 26.84',
'Info': '',
'Line art': '',
'Name': 'Stick-on Non-Vacuum Pads',
'Product number': '9156106',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: </strong>Medium</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/105/122/4/1051224/1213343_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/56/10/6/9156106/1213345_700.jpg'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Unique one-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 51.70',
'Info': '',
'Line art': '',
'Name': 'Stick-on Non-Vacuum Pads',
'Product number': '9156107',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: </strong>Medium</p>',
'image_1': 'https://www.richelieu.com/documents/docsPr/91/56/10/7/9156107/1213344_700.jpg',
'image_2': 'https://www.richelieu.com/documents/docsPr/91/56/10/7/9156107/1213346_700.jpg'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K060',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/06/0/PC371K060/1263523_700.jpg',
'\xa0': '$ 148.18'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K080',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/08/0/PC371K080/1263524_700.jpg',
'\xa0': '$ 148.18'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K120',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/12/0/PC371K120/1263526_700.jpg',
'\xa0': '$ 148.18'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Size: 2-1/2" x 14".',
'Each': '$ 12.36',
'Info': '',
'Line art': '',
'Name': 'Sandpaper Belt 2½ " x 14" for Compact Belt Sander PC371 or PC371K',
'Product number': 'PC371K100',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsPr/PC/37/1K/10/0/PC371K100/1263525_700.jpg',
'\xa0': '$ 148.18'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': 'Exclusive single-piece hub design reduces pad vibration and '
'ensures smooth performance.',
'Each': '$ 25.22',
'Info': '',
'Line art': '',
'Name': '5" Non-Vacuum Disc Pad Hook-Face',
'Product number': '91454325T',
'Technical specifications': '',
'image_1': 'https://www.richelieu.com/documents/docsGr/120/107/7/1201077/1419678_700.jpg'},
{'Catalog link': '',
'Category': 'Tools and Shop Supplies / Workshop Accessories / Tool '
'Accessories / Sander Accessories',
'Description': '- Pads mount with screws.',
'Each': '$ 31.80',
'Info': '',
'Line art': '',
'Name': 'Plates for Non-Vacuum (Grip-On) Dynabug II Disc Pads - 7.62 cm x '
'10.79 cm (3" x 4-1/4")',
'Product number': '9156315',
'Technical specifications': '<p><strong>thickness: </strong>3/8 '
'in</p><p><strong>density: </strong>Medium</p>',
'image_1': 'https://www.richelieu.com/documents/docsGr/116/625/4/1166254/1280825_700.jpg',
'\xa0': '$ 179.95'}
]
data2_ = [
{
'a': '2',
'f': '1',
'z': '9',
},
]
main(data_)
# main(data2_)
f.close()
f2.close()
Running above generates two files and then i run below on terminal
cat headers.csv data.csv > output.csv
And then open output.csv
in excel
The only issue you may see is #NAME?
, but those are because excel is trying to process the -
you had at the start of the text. If you to process text like that you need to uncomment the below part of the code
# if v and v[0] in ('=', '-'):
# # Mark the value as text, only needed if you want to display data in excel
# # else should be commented out
# v = "'" + v
Since your data comes from scraping, it may be considered a stream.
To mimic a stream, I use data_.pop()
to get one item at a time.
The following solution adds each item ones it comes form the stream.
The header and body of the csv are stored in different files.
The header potentially grows in length over time.
The the rows you saved before such a growth step naturally cannot know
this growth and therefore may be missing some trailing commas to indicate missing items.
import csv
import os
class StreamCSV: # Python 3
def __init__(self, header_file_name, body_file_name):
self.header_file_name = header_file_name
self.fbody = open(body_file_name, 'a', newline='', encoding='utf-8')
self.csv_body = csv.writer(self.fbody)
def add_item(self, item):
if os.path.exists(self.header_file_name):
with open(self.header_file_name, 'r', newline='', encoding='utf-8') as fobj:
reader = csv.reader(fobj)
try:
current_header = next(reader)
except StopIteration:
current_header = []
else:
current_header = []
header_set = set(current_header)
for key in item:
if key not in header_set:
current_header.append(key)
if len(header_set) < len(current_header):
with open(self.header_file_name, 'w', newline='', encoding='utf-8') as fobj:
writer = csv.writer(fobj)
writer.writerow(current_header)
item_data = [item.get(head, '') for head in current_header]
self.csv_body.writerow(item_data)
self.fbody.flush() # allows peeing into the file
if __name__ == '__main__':
data_ = [
{
'a': '1',
'b': '2',
'c': '3',
},
{
'a': '6',
'd': '1',
'b': '3',
},
{
'c': '2',
'e': '1',
'f': '9',
}
]
def show_saved(file_names):
for name in file_names:
with open(name) as fobj:
print(name)
print(fobj.read())
header_file_name, body_file_name = 'header.csv', 'body.csv'
stream_writer = StreamCSV(header_file_name, body_file_name)
for x in range(1, 4):
print('step:', x)
stream_writer.add_item(data_.pop())
show_saved([header_file_name, body_file_name])
Output that shows the growth over time:
step: 1
header.csv
c,e,f
body.csv
2,1,9
step: 2
header.csv
c,e,f,a,d,b
body.csv
2,1,9
,,,6,1,3
step: 3
header.csv
c,e,f,a,d,b
body.csv
2,1,9
,,,6,1,3
3,,,1,,2
You might want to merge header and body in an additional step, adding such missing trailing commas.
def merge_header_body(header_file_name, body_file_name, out_file_name):
with open(header_file_name, 'r', newline='', encoding='utf-8') as fobj:
reader = csv.reader(fobj)
header = next(reader)
with open(out_file_name, 'w', newline='', encoding='utf-8') as fobj_out, \
open(body_file_name, 'r', newline='', encoding='utf-8') as fobj_in:
reader = csv.reader(fobj_in)
writer = csv.writer(fobj_out)
writer.writerow(header)
target_length = len(header)
for row in reader:
diff = target_length - len(row)
row.extend([''] * diff)
writer.writerow(row)
out_file_name = 'merged.csv'
merge_header_body(header_file_name, body_file_name, out_file_name)
Content of merged.csv
:
c,e,f,a,d,b
2,1,9,,,
,,,6,1,3
3,,,1,,2
If the program crashes in between, it will resume. Let's take the same data as before and add more rows:
for x in range(1, 4):
print('step:', x)
stream_writer.add_item(data_.pop())
show_saved([header_file_name, body_file_name])
Output:
step: 1
header.csv
c,e,f,a,d,b
body.csv
2,1,9
,,,6,1,3
3,,,1,,2
2,1,9,,,
step: 2
header.csv
c,e,f,a,d,b
body.csv
2,1,9
,,,6,1,3
3,,,1,,2
2,1,9,,,
,,,6,1,3
step: 3
header.csv
c,e,f,a,d,b
body.csv
2,1,9
,,,6,1,3
3,,,1,,2
2,1,9,,,
,,,6,1,3
3,,,1,,2
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With