I'd like to accelerate the code written in Python and NumPy. I use Gray-Skott algorithm (http://mrob.com/pub/comp/xmorphia/index.html) for reaction-diffusion model, but with Numba and Cython it's even slower! Is it possible to speed it up? Thanks in advance!
Python+NumPy
def GrayScott(counts, Du, Dv, F, k):
n = 300
U = np.zeros((n+2,n+2), dtype=np.float_)
V = np.zeros((n+2,n+2), dtype=np.float_)
u, v = U[1:-1,1:-1], V[1:-1,1:-1]
r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
for i in range(counts):
Lu = ( U[0:-2,1:-1] +
U[1:-1,0:-2] - 4*U[1:-1,1:-1] + U[1:-1,2:] +
U[2: ,1:-1] )
Lv = ( V[0:-2,1:-1] +
V[1:-1,0:-2] - 4*V[1:-1,1:-1] + V[1:-1,2:] +
V[2: ,1:-1] )
uvv = u*v*v
u += Du*Lu - uvv + F*(1 - u)
v += Dv*Lv + uvv - (F + k)*v
return V
Numba
from numba import jit, autojit
@autojit
def numbaGrayScott(counts, Du, Dv, F, k):
n = 300
U = np.zeros((n+2,n+2), dtype=np.float_)
V = np.zeros((n+2,n+2), dtype=np.float_)
u, v = U[1:-1,1:-1], V[1:-1,1:-1]
r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
Lu = np.zeros_like(u)
Lv = np.zeros_like(v)
for i in range(counts):
for row in range(n):
for col in range(n):
Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1]
Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1]
uvv = u*v*v
u += Du*Lu - uvv + F*(1 - u)
v += Dv*Lv + uvv - (F + k)*v
return V
Cython
%%cython
cimport cython
import numpy as np
cimport numpy as np
cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k):
cdef int n = 300
cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray u = U[1:-1,1:-1]
cdef np.ndarray v = V[1:-1,1:-1]
cdef int r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
cdef np.ndarray Lu = np.zeros_like(u)
cdef np.ndarray Lv = np.zeros_like(v)
cdef int i, row, col
cdef np.ndarray uvv
for i in range(counts):
for row in range(n):
for col in range(n):
Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1]
Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1]
uvv = u*v*v
u += Du*Lu - uvv + F*(1 - u)
v += Dv*Lv + uvv - (F + k)*v
return V
Usage example:
GrayScott(4000, 0.16, 0.08, 0.04, 0.06)
Here is steps to speedup the cython version:
cdef np.ndarray
doen't make element access faster, you need use memoryview in cython: cdef double[:, ::1] bU = U
.
Turn off boundscheck
and wraparound
.
Do all the calculations in for-loop.
Here is the modified cython code:
%%cython
#cython: boundscheck=False
#cython: wraparound=False
cimport cython
import numpy as np
cimport numpy as np
cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k):
cdef int n = 300
cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray u = U[1:-1,1:-1]
cdef np.ndarray v = V[1:-1,1:-1]
cdef int r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
cdef np.ndarray Lu = np.zeros_like(u)
cdef np.ndarray Lv = np.zeros_like(v)
cdef int i, c, r1, c1, r2, c2
cdef double uvv
cdef double[:, ::1] bU = U
cdef double[:, ::1] bV = V
cdef double[:, ::1] bLu = Lu
cdef double[:, ::1] bLv = Lv
for i in range(counts):
for r in range(n):
r1 = r + 1
r2 = r + 2
for c in range(n):
c1 = c + 1
c2 = c + 2
bLu[r,c] = bU[r1,c2] + bU[r1,c] + bU[r2,c1] + bU[r,c1] - 4*bU[r1,c1]
bLv[r,c] = bV[r1,c2] + bV[r1,c] + bV[r2,c1] + bV[r,c1] - 4*bV[r1,c1]
for r in range(n):
r1 = r + 1
for c in range(n):
c1 = c + 1
uvv = bU[r1,c1]*bV[r1,c1]*bV[r1,c1]
bU[r1,c1] += Du*bLu[r,c] - uvv + F*(1 - bU[r1,c1])
bV[r1,c1] += Dv*bLv[r,c] + uvv - (F + k)*bV[r1,c1]
return V
It's about 11x faster than the numpy version.
Aside from the looping and the sheer volume of operations involved, what is most likely killing performance in your case is array allocation. I don't know why your Numba and Cython versions are not living up to your expectation, but you can make your numpy code 2x faster (at the cost of some readability), by doing all operations in-place, i.e. replacing your current loop with:
Lu, Lv, uvv = np.empty_like(u), np.empty_like(v), np.empty_like(u)
for i in range(counts):
Lu[:] = u
Lu *= -4
Lu += U[:-2,1:-1]
Lu += U[1:-1,:-2]
Lu += U[1:-1,2:]
Lu += U[2:,1:-1]
Lu *= Du
Lv[:] = v
Lv *= -4
Lv += V[:-2,1:-1]
Lv += V[1:-1,:-2]
Lv += V[1:-1,2:]
Lv += V[2:,1:-1]
Lv *= Dv
uvv[:] = u
uvv *= v
uvv *= v
Lu -= uvv
Lv += uvv
u *= 1 - F
u += F
u += Lu
v *= 1 - F - k
v += Lv
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With