I would like to understand where the gains are coming from when using Numba to accelerate pure numpy
code in a for loop. Are there any profiling tools that allow you to look into jitted
functions?
The demo code (as below) is just using very basic matrix multiplication to provide work to the computer. Are the observed gains from:
loop
,numpy
functions intercepted by the jit
during the compilation process, orjit
as numpy outsources execution via wrapper functions to low level libraries such as LINPACK
%matplotlib inline
import numpy as np
from numba import jit
import pandas as pd
#Dimensions of Matrices
i = 100
j = 100
def pure_python(N,i,j):
for n in range(N):
a = np.random.rand(i,j)
b = np.random.rand(i,j)
c = np.dot(a,b)
@jit(nopython=True)
def jit_python(N,i,j):
for n in range(N):
a = np.random.rand(i,j)
b = np.random.rand(i,j)
c = np.dot(a,b)
time_python = []
time_jit = []
N = [1,10,100,500,1000,2000]
for n in N:
time = %timeit -oq pure_python(n,i,j)
time_python.append(time.average)
time = %timeit -oq jit_python(n,i,j)
time_jit.append(time.average)
df = pd.DataFrame({'pure_python' : time_python, 'jit_python' : time_jit}, index=N)
df.index.name = 'Iterations'
df[["pure_python", "jit_python"]].plot()
produces the following chart.
TL:DR The random and looping get accelerated, but the matrix multiply doesn't except for small matrix size. At small matrix/loop size, there seems to be significant speedups that are probably related to python overhead. At large N, the matrix multiply begins to dominate and the jit less helpful
Function definitions, using a square matrix for simplicity.
from IPython.display import display
import numpy as np
from numba import jit
import pandas as pd
#Dimensions of Matrices
N = 1000
def py_rand(i, j):
a = np.random.rand(i, j)
jit_rand = jit(nopython=True)(py_rand)
def py_matmul(a, b):
c = np.dot(a, b)
jit_matmul = jit(nopython=True)(py_matmul)
def py_loop(N, val):
count = 0
for i in range(N):
count += val
jit_loop = jit(nopython=True)(py_loop)
def pure_python(N,i,j):
for n in range(N):
a = np.random.rand(i,j)
b = np.random.rand(i,j)
c = np.dot(a,a)
jit_func = jit(nopython=True)(pure_python)
Timing:
df = pd.DataFrame(columns=['Func', 'jit', 'N', 'Time'])
def meantime(f, *args, **kwargs):
t = %timeit -oq -n5 f(*args, **kwargs)
return t.average
for N in [10, 100, 1000, 2000]:
a = np.random.randn(N, N)
b = np.random.randn(N, N)
df = df.append({'Func': 'jit_rand', 'N': N, 'Time': meantime(jit_rand, N, N)}, ignore_index=True)
df = df.append({'Func': 'py_rand', 'N': N, 'Time': meantime(py_rand, N, N)}, ignore_index=True)
df = df.append({'Func': 'jit_matmul', 'N': N, 'Time': meantime(jit_matmul, a, b)}, ignore_index=True)
df = df.append({'Func': 'py_matmul', 'N': N, 'Time': meantime(py_matmul, a, b)}, ignore_index=True)
df = df.append({'Func': 'jit_loop', 'N': N, 'Time': meantime(jit_loop, N, 2.0)}, ignore_index=True)
df = df.append({'Func': 'py_loop', 'N': N, 'Time': meantime(py_loop, N, 2.0)}, ignore_index=True)
df = df.append({'Func': 'jit_func', 'N': N, 'Time': meantime(jit_func, 5, N, N)}, ignore_index=True)
df = df.append({'Func': 'py_func', 'N': N, 'Time': meantime(pure_python, 5, N, N)}, ignore_index=True)
df['jit'] = df['Func'].str.contains('jit')
df['Func'] = df['Func'].apply(lambda s: s.split('_')[1])
df.set_index('Func')
display(df)
result:
Func jit N Time
0 rand True 10 1.030686e-06
1 rand False 10 1.115149e-05
2 matmul True 10 2.250371e-06
3 matmul False 10 2.199343e-06
4 loop True 10 2.706000e-07
5 loop False 10 7.274286e-07
6 func True 10 1.217046e-05
7 func False 10 2.495837e-05
8 rand True 100 5.199217e-05
9 rand False 100 8.149794e-05
10 matmul True 100 7.848071e-05
11 matmul False 100 2.130794e-05
12 loop True 100 2.728571e-07
13 loop False 100 3.003743e-06
14 func True 100 6.739634e-04
15 func False 100 1.146594e-03
16 rand True 1000 5.644258e-03
17 rand False 1000 8.012790e-03
18 matmul True 1000 1.476098e-02
19 matmul False 1000 1.613211e-02
20 loop True 1000 2.846572e-07
21 loop False 1000 3.539849e-05
22 func True 1000 1.256926e-01
23 func False 1000 1.581177e-01
24 rand True 2000 2.061612e-02
25 rand False 2000 3.204709e-02
26 matmul True 2000 9.866484e-02
27 matmul False 2000 1.007234e-01
28 loop True 2000 3.011143e-07
29 loop False 2000 7.477454e-05
30 func True 2000 1.033560e+00
31 func False 2000 1.199969e+00
It looks like numba is optimizing away the loop, so I'm not gonna bother including it in the compare
plot:
def jit_speedup(d):
py_time = d[d['jit'] == False]['Time'].mean()
jit_time = d[d['jit'] == True]['Time'].mean()
return py_time / jit_time
import seaborn as sns
result = df.groupby(['Func', 'N']).apply(jit_speedup).reset_index().rename(columns={0: 'Jit Speedup'})
result = result[result['Func'] != 'loop']
sns.factorplot(data=result, x='N', y='Jit Speedup', hue='Func')
So for the loop being 5 repetitions, the jit speeds things up quite solidly until the matrix multiply becomes expensive enough to make the other overhead insignificant in comparison.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With