Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Writing Nested Dictionary to csv

I have a dictionary:

dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}}

I would like to tabulate this dictionary into a csv with the uppermost key being the leftmost column and the sub keys being headers on the uppermost row with each subsequent row being filled with the sub key values as such:

Location    a   b   c
Location1   1   2   3
Location2   4   5   6

Which I have successfully accomplished using the following script:

import csv

dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}}
fields = ["Location","a","b","c"]

with open(r"C:\Users\tyler.cowan\Desktop\tabulated.csv", "w", newline='') as f:
    w = csv.DictWriter(f, extrasaction='ignore', fieldnames = fields)
    w.writeheader()
    for k in dic:
        w.writerow({field: dic[k].get(field) or k for field in fields})

What is peculiar is that I write this test case into a real case, and end up with the equivalent of my location keys being distributed into other columns. Now my first thought was well I must have messed up building the dictionary but upon inspection I get the exact same format of dictionary with the exception of more key values. Yet with output like

Location    a   b   c   d           e   f   g   h
Location1   1   2   3   Location1   7   8   9   10
Location2   4   5   6   Location2   2   3   4   5

Below is my full script

# -*- coding: utf-8 -*-

import os
import csv


def pretty(d, indent=0):
    #prettify dict for visual Inspection
   for key, value in d.items():
      print('\t' * indent + str(key))
      if isinstance(value, dict):
         pretty(value, indent+1)
      else:
         if value == "":
             print("fubar")
         print('\t' * (indent+1) + str(value))



inFolder = "Folder"
dirList = os.listdir(inFolder)

#print(dirList)
fields = [ 'Lat-Long']
allData = {}
for file in dirList:
    fname, ext = os.path.splitext(file)
    if fname not in fields:
        fields.append(fname)

    #handle .dat in this block
    if ext.lower() == ".dat":
        #print("found dat ext: " + str(ext))
        with open(os.path.join(inFolder,file), "r") as f:
            for row in f:
                try:
                    row1 = row.split(" ")
                    if str(row1[0])+"-"+str(row1[1]) not in allData:
                        allData[str(row1[0])+"-"+str(row1[1])] = {}
                    else:
                        allData[str(row1[0])+"-"+str(row1[1])][fname] = row1[2]

                except IndexError:
                    row2 = row.split("\t")
                    if str(row2[0])+"-"+str(row2[1]) not in allData:
                        allData[str(row2[0])+"-"+str(row2[1])] = {}
                    else:
                        allData[str(row2[0])+"-"+str(row2[1])][fname] = "NA"

    elif ext.lower() == ".csv":
        with open(os.path.join(inFolder,file), "r") as f:
            for row in f:
                row1 = row.split(",")
                if str(row1[0])+"-"+str(row1[1]) not in allData:
                    allData[str(row1[0])+"-"+str(row1[1])] = {}
                else:
                    allData[str(row1[0])+"-"+str(row1[1])][fname] = row1[2]



pretty(allData)

with open("testBS.csv", "w", newline='') as f:
    w = csv.DictWriter(f, extrasaction='ignore', fieldnames = fields)
    w.writeheader()
    for k in allData:
        w.writerow({field: allData[k].get(field) or k for field in fields})

and the input data is like:

"example.dat"

32.1    101.3   65
32.1    101.3   66
32.1    101.3   67
32.1    101.3   68
32.1    101.3   69
32.1    101.3   70
32.1    101.3   71

I would like to figure out how to diagnose and resolve the behavior as I cant seem to figure out the difference between the test and real case.

like image 510
Tyler Cowan Avatar asked May 16 '18 23:05

Tyler Cowan


2 Answers

A possibility is to create a csv header that contains, along with the location value, a full listing of all subdictionary keys. That way, all subdictionary values can be written under their proper "key" columns:

import csv
dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}, "Location3":{'e':7,'f':8, 'g':9, 'h':10}, "Location4":{'e': 2, 'f': 3, 'g': 4, 'h': 5}}
header = sorted(set(i for b in map(dict.keys, dic.values()) for i in b))
with open('filename.csv', 'w', newline="") as f:
  write = csv.writer(f)
  write.writerow(['location', *header])
  for a, b in dic.items():
     write.writerow([a]+[b.get(i, '') for i in header])

Output:

location,a,b,c,e,f,g,h
Location1,1,2,3,,,,
Location2,4,5,6,,,,
Location3,,,,7,8,9,10
Location4,,,,2,3,4,5
like image 52
Ajax1234 Avatar answered Nov 14 '22 04:11

Ajax1234


You could just use pandas to do it.

import pandas as pd
dic = {"Location1":{"a":1,"b":2,"c":3},"Location2":{"a":4,"b":5,"c":6}, "Location3":{'e':7,'f':8, 'g':9, 'h':10}, "Location4":{'e': 2, 'f': 3, 'g': 4, 'h': 5}}
pd.DataFrame.from_dict(dic, orient='index').to_csv('temp.csv')

output:

 ,a,b,c,e,f,g,h
 Location1,1.0,2.0,3.0,,,,
 Location2,4.0,5.0,6.0,,,,
 Location3,,,,7.0,8.0,9.0,10.0
 Location4,,,,2.0,3.0,4.0,5.0
like image 24
wcsit Avatar answered Nov 14 '22 02:11

wcsit