I have a dictionary:
dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}}
I would like to tabulate this dictionary into a csv with the uppermost key being the leftmost column and the sub keys being headers on the uppermost row with each subsequent row being filled with the sub key values as such:
Location a b c
Location1 1 2 3
Location2 4 5 6
Which I have successfully accomplished using the following script:
import csv
dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}}
fields = ["Location","a","b","c"]
with open(r"C:\Users\tyler.cowan\Desktop\tabulated.csv", "w", newline='') as f:
w = csv.DictWriter(f, extrasaction='ignore', fieldnames = fields)
w.writeheader()
for k in dic:
w.writerow({field: dic[k].get(field) or k for field in fields})
What is peculiar is that I write this test case into a real case, and end up with the equivalent of my location keys being distributed into other columns. Now my first thought was well I must have messed up building the dictionary but upon inspection I get the exact same format of dictionary with the exception of more key values. Yet with output like
Location a b c d e f g h
Location1 1 2 3 Location1 7 8 9 10
Location2 4 5 6 Location2 2 3 4 5
Below is my full script
# -*- coding: utf-8 -*-
import os
import csv
def pretty(d, indent=0):
#prettify dict for visual Inspection
for key, value in d.items():
print('\t' * indent + str(key))
if isinstance(value, dict):
pretty(value, indent+1)
else:
if value == "":
print("fubar")
print('\t' * (indent+1) + str(value))
inFolder = "Folder"
dirList = os.listdir(inFolder)
#print(dirList)
fields = [ 'Lat-Long']
allData = {}
for file in dirList:
fname, ext = os.path.splitext(file)
if fname not in fields:
fields.append(fname)
#handle .dat in this block
if ext.lower() == ".dat":
#print("found dat ext: " + str(ext))
with open(os.path.join(inFolder,file), "r") as f:
for row in f:
try:
row1 = row.split(" ")
if str(row1[0])+"-"+str(row1[1]) not in allData:
allData[str(row1[0])+"-"+str(row1[1])] = {}
else:
allData[str(row1[0])+"-"+str(row1[1])][fname] = row1[2]
except IndexError:
row2 = row.split("\t")
if str(row2[0])+"-"+str(row2[1]) not in allData:
allData[str(row2[0])+"-"+str(row2[1])] = {}
else:
allData[str(row2[0])+"-"+str(row2[1])][fname] = "NA"
elif ext.lower() == ".csv":
with open(os.path.join(inFolder,file), "r") as f:
for row in f:
row1 = row.split(",")
if str(row1[0])+"-"+str(row1[1]) not in allData:
allData[str(row1[0])+"-"+str(row1[1])] = {}
else:
allData[str(row1[0])+"-"+str(row1[1])][fname] = row1[2]
pretty(allData)
with open("testBS.csv", "w", newline='') as f:
w = csv.DictWriter(f, extrasaction='ignore', fieldnames = fields)
w.writeheader()
for k in allData:
w.writerow({field: allData[k].get(field) or k for field in fields})
and the input data is like:
"example.dat"
32.1 101.3 65
32.1 101.3 66
32.1 101.3 67
32.1 101.3 68
32.1 101.3 69
32.1 101.3 70
32.1 101.3 71
I would like to figure out how to diagnose and resolve the behavior as I cant seem to figure out the difference between the test and real case.
A possibility is to create a csv
header that contains, along with the location value, a full listing of all subdictionary keys. That way, all subdictionary values can be written under their proper "key" columns:
import csv
dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}, "Location3":{'e':7,'f':8, 'g':9, 'h':10}, "Location4":{'e': 2, 'f': 3, 'g': 4, 'h': 5}}
header = sorted(set(i for b in map(dict.keys, dic.values()) for i in b))
with open('filename.csv', 'w', newline="") as f:
write = csv.writer(f)
write.writerow(['location', *header])
for a, b in dic.items():
write.writerow([a]+[b.get(i, '') for i in header])
Output:
location,a,b,c,e,f,g,h
Location1,1,2,3,,,,
Location2,4,5,6,,,,
Location3,,,,7,8,9,10
Location4,,,,2,3,4,5
You could just use pandas to do it.
import pandas as pd
dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}, "Location3":{'e':7,'f':8, 'g':9, 'h':10}, "Location4":{'e': 2, 'f': 3, 'g': 4, 'h': 5}}
pd.DataFrame.from_dict(dic, orient='index').to_csv('temp.csv')
output:
,a,b,c,e,f,g,h
Location1,1.0,2.0,3.0,,,,
Location2,4.0,5.0,6.0,,,,
Location3,,,,7.0,8.0,9.0,10.0
Location4,,,,2.0,3.0,4.0,5.0
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With