I have a python dictionary
containing 3 lists
in the keys 'time', 'power' and 'usage'.
All the lists have the same number of elements and all the lists are sorted. What
I want to do is to sum up all the elements for lists 'power' and 'usage' that their indexes
correspond to the same value in list 'time', so as to have only one sample of power and usage per time unit.
For example transform this dictionary:
{'time': [1, 2, 2, 3, 4, 4, 5],
'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7]}
to this one:
{'time': [1, 2, 3, 4, 5],
'power': [2, 5, 6, 6, 2],
'usage': [0, 2, 2, 5, 7]}
Already I have written this piece of code that works but I don't like it so much:
d = {'time':[1,2,2,3,4,4,5], 'power':[0,1,1,2,1,4,7], 'usage':[2,2,3,6,3,3,2]}
prev = -1
new_d = {'time':[], 'power': [], 'usage':[]}
indexes = range( len(d['time']) )
for i in indexes:
if d['time'][i]!=prev:
new_d['time'].append(d['time'][i])
new_d['power'].append(d['power'][i])
new_d['usage'].append(d['usage'][i])
else:
last_power = len( new_d['power'] ) - 1
last_usage = len( new_d['usage'] ) - 1
new_d['power'][last_power]+=d['power'][i]
new_d['usage'][last_usage]+=d['usage'][i]
prev=d['time'][i]
print d
print new_d
Is there a pythonian way to do this more simply and comprehensive?
A robust solution that can handle any number of extra fields - sorted by the 'time' field (as a method):
def aggregate(old_d, sort_key='time'):
new_d = dict((k, []) for k in old_d)
prev = None
curr = None
for i in range(len(old_d[sort_key])):
curr = old_d[sort_key][i]
for key, lst in new_d.iteritems(): # .items() in Python 3+
if prev == curr:
if key != sort_key:
lst[-1] += old_d[key][i]
else:
lst.append(old_d[key][i])
prev = curr
return new_d
Using your dictionary:
d = {'time': [1, 2, 2, 3, 4, 4, 5],
'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7]}
print aggregate(d)
>>>
{'usage': [0, 2, 2, 5, 7], 'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5]}
Here's one that will handle arbitrary dictionaries.... (where d
is your dict...)
from itertools import groupby, imap
from operator import itemgetter
def group_dict_by(mapping, field, agg=sum):
grouper = mapping[field]
new_grouper = []
accum = {k: [] for k in mapping.viewkeys() - [field]}
for key, grp in groupby(enumerate(grouper), itemgetter(1)):
new_grouper.append(key)
idx = [g[0] for g in grp]
for dk, dv in accum.iteritems():
dv.append(agg(imap(mapping[dk].__getitem__, idx)))
accum[field] = new_grouper
return accum
print group_dict_by(d, 'time')
# {'usage': [0, 2, 2, 5, 7], 'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5]}
Using itertools.groupby
, zip
and some list comprehensions:
In [55]: dic={'time': [1, 2, 2, 3, 4, 4, 5],
....: 'power': [2, 2, 3, 6, 3, 3, 2],
....: 'usage': [0, 1, 1, 2, 1, 4, 7]}
In [56]: from itertools import groupby
In [57]: from operator import itemgetter
In [58]: zip1=zip(dic['time'],dic['power']) #use `itertools.izip` for performance
In [59]: [sum(x[1] for x in v) for k,v in groupby(zip1,key=itemgetter(0))]
Out[59]: [2, 5, 6, 6, 2]
In [60]: zip2=zip(dic['time'],dic['usage'])
In [61]: [sum(x[1] for x in v) for k,v in groupby(zip2,key=itemgetter(0))]
Out[61]: [0, 2, 2, 5, 7]
In [64]: timee=[k for k,v in groupby(dic['time'])]
In [65]: timee
Out[65]: [1, 2, 3, 4, 5]
zip1
is [(1, 2), (2, 2), (2, 3), (3, 6), (4, 3), (4, 3), (5, 2)]
, now you can group elements based on the first item using itertools.groupby
and then take the sum of the second element of each tuple in the returned group.
In [75]: new_time=[k for k,v in groupby(dic['time'])]
In [76]: new_power=[sum(x[1] for x in v) for k,v in groupby(zip1,key=itemgetter(0))]
In [77]: new_usage=[sum(x[1] for x in v) for k,v in groupby(zip2,key=itemgetter(0))]
In [80]: dict(zip(('time','power','usage'),(new_time,new_power,new_usage)))
Out[80]: {'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5], 'usage': [0, 2, 2, 5, 7]}
>>> from itertools import groupby
>>> from operator import itemgetter
>>> d = {'usage': [0, 1, 1, 2, 1, 4, 7], 'power': [2, 2, 3, 6, 3, 3, 2], 'time': [1, 2, 2, 3, 4, 4, 5]}
>>> groups = groupby(zip(d['time'], d['power'], d['usage']), key=itemgetter(0))
>>> lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
>>> dict(zip(('time', 'power', 'usage'), lists))
{'usage': (0, 2, 2, 5, 7), 'power': (2, 5, 6, 6, 2), 'time': (1, 2, 3, 4, 5)}
For variable number of keys, I've added the keys
variable to avoid having to rewrite them:
>>> from itertools import groupby
>>> from operator import itemgetter
>>> keys = ('time', 'power', 'usage')
>>> groups = groupby(zip(*[d[k] for k in keys]), key=itemgetter(0))
>>> lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
>>> dict(zip(keys, lists))
{'usage': (0, 2, 2, 5, 7), 'power': (2, 5, 6, 6, 2), 'time': (1, 2, 3, 4, 5)}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With