Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to calculate the sum of "submatrix" entries with numpy/pandas?

I have the following 8x8 matrix in Python, which I have represented as either an 8-by-8 numpy array, or a pandas DataFrame:

import numpy as np
import pandas as pd

x = range(64)

x = np.reshape(x,(8,8)) 

print(x)

# [[ 0  1  2  3  4  5  6  7]
#  [ 8  9 10 11 12 13 14 15]
#  [16 17 18 19 20 21 22 23]
#  [24 25 26 27 28 29 30 31]
#  [32 33 34 35 36 37 38 39]
#  [40 41 42 43 44 45 46 47]
#  [48 49 50 51 52 53 54 55]
#  [56 57 58 59 60 61 62 63]]

df = pd.DataFrame(x)

print(df)

#      0   1   2   3   4   5   6   7
#  0   0   1   2   3   4   5   6   7
#  1   8   9  10  11  12  13  14  15
#  2  16  17  18  19  20  21  22  23
#  3  24  25  26  27  28  29  30  31
#  4  32  33  34  35  36  37  38  39
#  5  40  41  42  43  44  45  46  47
#  6  48  49  50  51  52  53  54  55
#  7  56  57  58  59  60  61  62  63

I'm trying to calculate the sum of the values if it were a 2-by-2 matrix, and replace the above values with this sum. My end result would be

#      0   1   2   3   4   5   6   7
#  0  216  216  216  216  280  280  280  280
#  1  216  216  216  216  280  280  280  280
#  2  216  216  216  216  280  280  280  280
#  3  216  216  216  216  280  280  280  280
#  4  728  728  728  728  792  792  792  792
#  5  728  728  728  728  792  792  792  792
#  6  728  728  728  728  792  792  792  792
#  7  728  728  728  728  792  792  792  792

So, the top corner matrix has a count 216 because

0+1+2+3+8+9+10+11+16+17+18+19+24+25+26+27=216

Similarly,

32+33+34+35+40+41+42+43+48+49+50+51+56+57+58+59=728
4+5+6+7+12+13+14+15+20+21+22+23+28+29+30+31=280
36+37+38+39+44+45+46+47+52+53+54+55+60+61+62+63=792

Is there numpy/pandas functionality to make this calculate easier? Especially for much larger matrices whereby manually setting the coordinates of the "sum matrices" could be quite cumbersome.

like image 446
ShanZhengYang Avatar asked Dec 08 '22 14:12

ShanZhengYang


1 Answers

One way to do that with NumPy is this:

import numpy as np

def as_submatrices(x, rows, cols=None, writeable=False):
    from numpy.lib.stride_tricks import as_strided
    if cols is None: cols = rows
    x = np.asarray(x)
    x_rows, x_cols = x.shape
    s1, s2 = x.strides
    if x_rows % rows != 0 or x_cols % cols != 0:
        raise ValueError('Invalid dimensions.')
    out_shape = (x_rows // rows, x_cols // cols, rows, cols)
    out_strides = (s1 * rows, s2 * cols, s1, s2)
    return as_strided(x, out_shape, out_strides, writeable=writeable)

def sum_submatrices(x, rows, cols=None):
    if cols is None: cols = rows
    x = np.asarray(x)
    x_sub = as_submatrices(x, rows, cols)
    x_sum = np.sum(x_sub, axis=(2, 3))
    x_rows, x_cols = x.shape
    return np.repeat(np.repeat(x_sum, rows, axis=0), cols, axis=1)

x = np.arange(64).reshape((8, 8))

print(sum_submatrices(x, 4))
# [[216 216 216 216 280 280 280 280]
#  [216 216 216 216 280 280 280 280]
#  [216 216 216 216 280 280 280 280]
#  [216 216 216 216 280 280 280 280]
#  [728 728 728 728 792 792 792 792]
#  [728 728 728 728 792 792 792 792]
#  [728 728 728 728 792 792 792 792]
#  [728 728 728 728 792 792 792 792]]

print(sum_submatrices(x, 2))
# [[ 18  18  26  26  34  34  42  42]
#  [ 18  18  26  26  34  34  42  42]
#  [ 82  82  90  90  98  98 106 106]
#  [ 82  82  90  90  98  98 106 106]
#  [146 146 154 154 162 162 170 170]
#  [146 146 154 154 162 162 170 170]
#  [210 210 218 218 226 226 234 234]
#  [210 210 218 218 226 226 234 234]]

print(sum_submatrices(x, 2, 8))
# [[120 120 120 120 120 120 120 120]
#  [120 120 120 120 120 120 120 120]
#  [376 376 376 376 376 376 376 376]
#  [376 376 376 376 376 376 376 376]
#  [632 632 632 632 632 632 632 632]
#  [632 632 632 632 632 632 632 632]
#  [888 888 888 888 888 888 888 888]
#  [888 888 888 888 888 888 888 888]]

EDIT: As pointed out by Divakar, np.broadcast_to is faster that np.repeat here, so the improved version of the function above would be:

def sum_submatrices(x, rows, cols=None):
    if cols is None: cols = rows
    x = np.asarray(x)
    x_sub = as_submatrices(x, rows, cols)
    x_sum = np.sum(x_sub, axis=(2, 3), keepdims=True)
    x_sum = np.broadcast_to(x_sum, x_sub.shape)
    return x_sum.transpose((0, 2, 1, 3)).reshape(x.shape)

Which is essentially the same as Divakar's answer, only that one is nicer since it does not use stride tricks and transposing.

like image 106
jdehesa Avatar answered Jan 05 '23 00:01

jdehesa