Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to sum 3 same sized sorted lists based on the identical elements of the first one in Python?

I have a python dictionary containing 3 lists in the keys 'time', 'power' and 'usage'. All the lists have the same number of elements and all the lists are sorted. What I want to do is to sum up all the elements for lists 'power' and 'usage' that their indexes correspond to the same value in list 'time', so as to have only one sample of power and usage per time unit.

For example transform this dictionary:

{'time': [1, 2, 2, 3, 4, 4, 5],
 'power': [2, 2, 3, 6, 3, 3, 2],
 'usage': [0, 1, 1, 2, 1, 4, 7]}

to this one:

{'time': [1, 2, 3, 4, 5],
 'power': [2, 5, 6, 6, 2],
 'usage': [0, 2, 2, 5, 7]}

Already I have written this piece of code that works but I don't like it so much:

d = {'time':[1,2,2,3,4,4,5], 'power':[0,1,1,2,1,4,7], 'usage':[2,2,3,6,3,3,2]}
prev = -1
new_d = {'time':[], 'power': [], 'usage':[]}
indexes =  range( len(d['time']) )

for i in indexes:
  if d['time'][i]!=prev:
    new_d['time'].append(d['time'][i])
    new_d['power'].append(d['power'][i])
    new_d['usage'].append(d['usage'][i])
  else:
    last_power = len( new_d['power'] ) - 1
    last_usage = len( new_d['usage'] ) - 1
    new_d['power'][last_power]+=d['power'][i]
    new_d['usage'][last_usage]+=d['usage'][i]
  prev=d['time'][i]

print d
print new_d

Is there a pythonian way to do this more simply and comprehensive?

like image 781
Thanasis Petsas Avatar asked Apr 10 '13 12:04

Thanasis Petsas


4 Answers

A robust solution that can handle any number of extra fields - sorted by the 'time' field (as a method):

def aggregate(old_d, sort_key='time'):
    new_d = dict((k, []) for k in old_d)
    prev = None
    curr = None
    for i in range(len(old_d[sort_key])):
        curr = old_d[sort_key][i]
        for key, lst in new_d.iteritems(): # .items() in Python 3+
            if prev == curr:
                if key != sort_key:           
                    lst[-1] += old_d[key][i]
            else:
                lst.append(old_d[key][i])
        prev = curr
    return new_d

Using your dictionary:

d = {'time': [1, 2, 2, 3, 4, 4, 5],
     'power': [2, 2, 3, 6, 3, 3, 2],
     'usage': [0, 1, 1, 2, 1, 4, 7]}

print aggregate(d)
>>>
{'usage': [0, 2, 2, 5, 7], 'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5]}
like image 92
Inbar Rose Avatar answered Nov 09 '22 13:11

Inbar Rose


Here's one that will handle arbitrary dictionaries.... (where d is your dict...)

from itertools import groupby, imap
from operator import itemgetter

def group_dict_by(mapping, field, agg=sum):
    grouper = mapping[field]
    new_grouper = []
    accum = {k: [] for k in mapping.viewkeys() - [field]}
    for key, grp in groupby(enumerate(grouper), itemgetter(1)):
        new_grouper.append(key)
        idx = [g[0] for g in grp]   
        for dk, dv in accum.iteritems():
            dv.append(agg(imap(mapping[dk].__getitem__, idx)))

    accum[field] = new_grouper
    return accum

print group_dict_by(d, 'time')
# {'usage': [0, 2, 2, 5, 7], 'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5]}
like image 27
Jon Clements Avatar answered Nov 09 '22 14:11

Jon Clements


Using itertools.groupby, zip and some list comprehensions:

In [55]: dic={'time': [1, 2, 2, 3, 4, 4, 5],
   ....:  'power': [2, 2, 3, 6, 3, 3, 2],
   ....:  'usage': [0, 1, 1, 2, 1, 4, 7]}

In [56]: from itertools import groupby

In [57]: from operator import itemgetter

In [58]: zip1=zip(dic['time'],dic['power']) #use `itertools.izip` for performance    

In [59]: [sum(x[1] for x in v) for k,v in groupby(zip1,key=itemgetter(0))]
Out[59]: [2, 5, 6, 6, 2]

In [60]: zip2=zip(dic['time'],dic['usage'])

In [61]: [sum(x[1] for x in v) for k,v in groupby(zip2,key=itemgetter(0))]
Out[61]: [0, 2, 2, 5, 7]

In [64]: timee=[k for k,v in groupby(dic['time'])]

In [65]: timee
Out[65]: [1, 2, 3, 4, 5]

zip1 is [(1, 2), (2, 2), (2, 3), (3, 6), (4, 3), (4, 3), (5, 2)], now you can group elements based on the first item using itertools.groupby and then take the sum of the second element of each tuple in the returned group.

In [75]: new_time=[k for k,v in groupby(dic['time'])]

In [76]: new_power=[sum(x[1] for x in v) for k,v in groupby(zip1,key=itemgetter(0))]

In [77]: new_usage=[sum(x[1] for x in v) for k,v in groupby(zip2,key=itemgetter(0))]

In [80]: dict(zip(('time','power','usage'),(new_time,new_power,new_usage)))
Out[80]: {'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5], 'usage': [0, 2, 2, 5, 7]}
like image 2
Ashwini Chaudhary Avatar answered Nov 09 '22 15:11

Ashwini Chaudhary


>>> from itertools import groupby
>>> from operator import itemgetter
>>> d = {'usage': [0, 1, 1, 2, 1, 4, 7], 'power': [2, 2, 3, 6, 3, 3, 2], 'time': [1, 2, 2, 3, 4, 4, 5]}
>>> groups = groupby(zip(d['time'], d['power'], d['usage']), key=itemgetter(0))
>>> lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
>>> dict(zip(('time', 'power', 'usage'), lists))
{'usage': (0, 2, 2, 5, 7), 'power': (2, 5, 6, 6, 2), 'time': (1, 2, 3, 4, 5)}

For variable number of keys, I've added the keys variable to avoid having to rewrite them:

>>> from itertools import groupby
>>> from operator import itemgetter
>>> keys = ('time', 'power', 'usage')
>>> groups = groupby(zip(*[d[k] for k in keys]), key=itemgetter(0))
>>> lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
>>> dict(zip(keys, lists))
{'usage': (0, 2, 2, 5, 7), 'power': (2, 5, 6, 6, 2), 'time': (1, 2, 3, 4, 5)}
like image 1
jamylak Avatar answered Nov 09 '22 14:11

jamylak