Where are the gains using numba coming from for pure numpy code?

Question

I would like to understand where the gains are coming from when using Numba to accelerate pure numpy code in a for loop. Are there any profiling tools that allow you to look into jitted functions?

The demo code (as below) is just using very basic matrix multiplication to provide work to the computer. Are the observed gains from:

a faster loop,
the recasting of numpy functions intercepted by the jit during the compilation process, or
less overhead with jit as numpy outsources execution via wrapper functions to low level libraries such as LINPACK

%matplotlib inline
import numpy as np
from numba import jit
import pandas as pd

#Dimensions of Matrices
i = 100 
j = 100

def pure_python(N,i,j):
    for n in range(N):
        a = np.random.rand(i,j)
        b = np.random.rand(i,j)
        c = np.dot(a,b)

@jit(nopython=True)
def jit_python(N,i,j):
    for n in range(N):
        a = np.random.rand(i,j)
        b = np.random.rand(i,j)
        c = np.dot(a,b)

time_python = []
time_jit = []
N = [1,10,100,500,1000,2000]
for n in N:
    time = %timeit -oq pure_python(n,i,j)
    time_python.append(time.average)
    time = %timeit -oq jit_python(n,i,j)
    time_jit.append(time.average)

df = pd.DataFrame({'pure_python' : time_python, 'jit_python' : time_jit}, index=N)
df.index.name = 'Iterations'
df[["pure_python", "jit_python"]].plot()

produces the following chart.

runtime comparisons for a range of iteration lengths

evamicur · Accepted Answer

TL:DR The random and looping get accelerated, but the matrix multiply doesn't except for small matrix size. At small matrix/loop size, there seems to be significant speedups that are probably related to python overhead. At large N, the matrix multiply begins to dominate and the jit less helpful

Function definitions, using a square matrix for simplicity.

from IPython.display import display
import numpy as np
from numba import jit
import pandas as pd

#Dimensions of Matrices
N = 1000

def py_rand(i, j):
    a = np.random.rand(i, j)

jit_rand = jit(nopython=True)(py_rand)

def py_matmul(a, b):
    c = np.dot(a, b)

jit_matmul = jit(nopython=True)(py_matmul)

def py_loop(N, val):
    count = 0
    for i in range(N):
        count += val     


jit_loop = jit(nopython=True)(py_loop)      

def pure_python(N,i,j):
    for n in range(N):
        a = np.random.rand(i,j)
        b = np.random.rand(i,j)
        c = np.dot(a,a)

jit_func = jit(nopython=True)(pure_python)

Timing:

df = pd.DataFrame(columns=['Func', 'jit', 'N', 'Time'])
def meantime(f, *args, **kwargs):
    t = %timeit -oq -n5 f(*args, **kwargs)
    return t.average


for N in [10, 100, 1000, 2000]:
    a = np.random.randn(N, N)
    b = np.random.randn(N, N)

    df = df.append({'Func': 'jit_rand', 'N': N, 'Time': meantime(jit_rand, N, N)}, ignore_index=True)
    df = df.append({'Func': 'py_rand', 'N': N, 'Time': meantime(py_rand, N, N)}, ignore_index=True)

    df = df.append({'Func': 'jit_matmul', 'N': N, 'Time': meantime(jit_matmul, a, b)}, ignore_index=True)
    df = df.append({'Func': 'py_matmul', 'N': N, 'Time': meantime(py_matmul, a, b)}, ignore_index=True)

    df = df.append({'Func': 'jit_loop', 'N': N, 'Time': meantime(jit_loop, N, 2.0)}, ignore_index=True)
    df = df.append({'Func': 'py_loop', 'N': N, 'Time': meantime(py_loop, N, 2.0)}, ignore_index=True)

    df = df.append({'Func': 'jit_func', 'N': N, 'Time': meantime(jit_func, 5, N, N)}, ignore_index=True)
    df = df.append({'Func': 'py_func', 'N': N, 'Time': meantime(pure_python, 5, N, N)}, ignore_index=True)

df['jit'] = df['Func'].str.contains('jit')
df['Func'] = df['Func'].apply(lambda s: s.split('_')[1])
df.set_index('Func')
display(df)

result:

    Func    jit     N   Time
0   rand    True    10  1.030686e-06
1   rand    False   10  1.115149e-05
2   matmul  True    10  2.250371e-06
3   matmul  False   10  2.199343e-06
4   loop    True    10  2.706000e-07
5   loop    False   10  7.274286e-07
6   func    True    10  1.217046e-05
7   func    False   10  2.495837e-05
8   rand    True    100 5.199217e-05
9   rand    False   100 8.149794e-05
10  matmul  True    100 7.848071e-05
11  matmul  False   100 2.130794e-05
12  loop    True    100 2.728571e-07
13  loop    False   100 3.003743e-06
14  func    True    100 6.739634e-04
15  func    False   100 1.146594e-03
16  rand    True    1000    5.644258e-03
17  rand    False   1000    8.012790e-03
18  matmul  True    1000    1.476098e-02
19  matmul  False   1000    1.613211e-02
20  loop    True    1000    2.846572e-07
21  loop    False   1000    3.539849e-05
22  func    True    1000    1.256926e-01
23  func    False   1000    1.581177e-01
24  rand    True    2000    2.061612e-02
25  rand    False   2000    3.204709e-02
26  matmul  True    2000    9.866484e-02
27  matmul  False   2000    1.007234e-01
28  loop    True    2000    3.011143e-07
29  loop    False   2000    7.477454e-05
30  func    True    2000    1.033560e+00
31  func    False   2000    1.199969e+00

It looks like numba is optimizing away the loop, so I'm not gonna bother including it in the compare

plot:

def jit_speedup(d):
    py_time = d[d['jit'] == False]['Time'].mean()
    jit_time = d[d['jit'] == True]['Time'].mean()
    return py_time / jit_time 

import seaborn as sns
result = df.groupby(['Func', 'N']).apply(jit_speedup).reset_index().rename(columns={0: 'Jit Speedup'})
result = result[result['Func'] != 'loop']
sns.factorplot(data=result, x='N', y='Jit Speedup', hue='Func')

enter image description here

So for the loop being 5 repetitions, the jit speeds things up quite solidly until the matrix multiply becomes expensive enough to make the other overhead insignificant in comparison.

Where are the gains using numba coming from for pure numpy code?

Tags:

python

numpy

numba

sanguineturtle

1 Answers

evamicur

Recent Activity

Donate For Us

Where are the gains using numba coming from for pure numpy code?

Tags:

python

numpy

numba

sanguineturtle

1 Answers

evamicur

Related questions

Recent Activity

Donate For Us