I am trying to convert my Python / Numpy code to Cython code for speedup purposes. However, Cython is MUCH slower (3-4 times) than the Python / Numpy code. Am I using Cython correctly? Am I passing arguments correctly to myc_rb_etc() in my Cython code? What about when I call the integrate function? Thank you in advance for your help. Here is my Python / Numpy code:
from pylab import *
import pylab as pl
from numpy import *
import numpy as np
from scipy import integrate
def myc_rb_e2f(y,t,k,d):
M = y[0]
E = y[1]
CD = y[2]
CE = y[3]
R = y[4]
RP = y[5]
RE = y[6]
S = 0.01
if t > 300:
S = 5.0
#if t > 400
#S = 0.01
t1 = k[0]*S/(k[7]+S);
t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E));
t3 = k[5]*M/(k[14]+M);
t4 = k[11]*CD*RE/(k[16]+RE);
t5 = k[12]*CE*RE/(k[17]+RE);
t6 = k[2]*M/(k[14]+M);
t7 = k[3]*S/(k[7]+S);
t8 = k[6]*E/(k[15]+E);
t9 = k[13]*RP/(k[18]+RP);
t10 = k[9]*CD*R/(k[16]+R);
t11 = k[10]*CE*R/(k[17]+R);
dM = t1-d[0]*M
dE = t2+t3+t4+t5-k[8]*R*E-d[1]*E
dCD = t6+t7-d[2]*CD
dCE = t8-d[3]*CE
dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R
dRP = t10+t11+t4+t5-t9-d[5]*RP
dRE = k[8]*R*E-t4-t5-d[6]*RE
dy = [dM,dE,dCD,dCE,dR,dRP,dRE]
return dy
t = np.zeros(10000)
t = np.linspace(0.,3000.,10000.)
# Initial concentrations of [M,E,CD,CE,R,RP,RE]
y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25])
E_simulated = np.zeros([10000,5000])
E_avg = np.zeros([10000])
k = np.zeros([19])
d = np.zeros([7])
for i in range (0,5000):
k[0] = 1.+0.1*randn(1)
k[1] = 0.15+0.05*randn(1)
k[2] = 0.2+0.05*randn(1)
k[3] = 0.2+0.05*randn(1)
k[4] = 0.35+0.05*randn(1)
k[5] = 0.001+0.0001*randn(1)
k[6] = 0.5+0.05*randn(1)
k[7] = 0.3+0.05*randn(1)
k[8] = 30.+5.*randn(1)
k[9] = 18.+3.*randn(1)
k[10] = 18.+3.*randn(1)
k[11] = 18.+3.*randn(1)
k[12] = 18.+3.*randn(1)
k[13] = 3.6+0.5*randn(1)
k[14] = 0.15+0.05*randn(1)
k[15] = 0.15+0.05*randn(1)
k[16] = 0.92+0.1*randn(1)
k[17] = 0.92+0.1*randn(1)
k[18] = 0.01+0.001*randn(1)
d[0] = 0.7+0.05*randn(1)
d[1] = 0.25+0.025*randn(1)
d[2] = 1.5+0.05*randn(1)
d[3] = 1.5+0.05*randn(1)
d[4] = 0.06+0.01*randn(1)
d[5] = 0.06+0.01*randn(1)
d[6] = 0.03+0.005*randn(1)
r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d))
E_simulated[:,i] = r[:,1]
for i in range(0,10000):
E_avg[i] = sum(E_simulated[i,:])/5000.
pl.plot(t,E_avg,'-ro')
pl.show()
Here is the code converted into Cython:
cimport numpy as np
import numpy as np
from numpy import *
import pylab as pl
from pylab import *
from scipy import integrate
def myc_rb_e2f(y,t,k,d):
cdef double M = y[0]
cdef double E = y[1]
cdef double CD = y[2]
cdef double CE = y[3]
cdef double R = y[4]
cdef double RP = y[5]
cdef double RE = y[6]
cdef double S = 0.01
if t > 300.0:
S = 5.0
#if t > 400
#S = 0.01
cdef double t1 = k[0]*S/(k[7]+S)
cdef double t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E))
cdef double t3 = k[5]*M/(k[14]+M)
cdef double t4 = k[11]*CD*RE/(k[16]+RE)
cdef double t5 = k[12]*CE*RE/(k[17]+RE)
cdef double t6 = k[2]*M/(k[14]+M)
cdef double t7 = k[3]*S/(k[7]+S)
cdef double t8 = k[6]*E/(k[15]+E)
cdef double t9 = k[13]*RP/(k[18]+RP)
cdef double t10 = k[9]*CD*R/(k[16]+R)
cdef double t11 = k[10]*CE*R/(k[17]+R)
cdef double dM = t1-d[0]*M
cdef double dE = t2+t3+t4+t5-k[8]*R*E-d[1]*E
cdef double dCD = t6+t7-d[2]*CD
cdef double dCE = t8-d[3]*CE
cdef double dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R
cdef double dRP = t10+t11+t4+t5-t9-d[5]*RP
cdef double dRE = k[8]*R*E-t4-t5-d[6]*RE
dy = [dM,dE,dCD,dCE,dR,dRP,dRE]
return dy
def main():
cdef np.ndarray[double,ndim=1] t = np.zeros(10000)
t = np.linspace(0.,3000.,10000.)
# Initial concentrations of [M,E,CD,CE,R,RP,RE]
cdef np.ndarray[double,ndim=1] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25])
cdef np.ndarray[double,ndim=2] E_simulated = np.zeros([10000,5000])
cdef np.ndarray[double,ndim=2] r = np.zeros([10000,7])
cdef np.ndarray[double,ndim=1] E_avg = np.zeros([10000])
cdef np.ndarray[double,ndim=1] k = np.zeros([19])
cdef np.ndarray[double,ndim=1] d = np.zeros([7])
cdef int i
for i in range (0,5000):
k[0] = 1.+0.1*randn(1)
k[1] = 0.15+0.05*randn(1)
k[2] = 0.2+0.05*randn(1)
k[3] = 0.2+0.05*randn(1)
k[4] = 0.35+0.05*randn(1)
k[5] = 0.001+0.0001*randn(1)
k[6] = 0.5+0.05*randn(1)
k[7] = 0.3+0.05*randn(1)
k[8] = 30.+5.*randn(1)
k[9] = 18.+3.*randn(1)
k[10] = 18.+3.*randn(1)
k[11] = 18.+3.*randn(1)
k[12] = 18.+3.*randn(1)
k[13] = 3.6+0.5*randn(1)
k[14] = 0.15+0.05*randn(1)
k[15] = 0.15+0.05*randn(1)
k[16] = 0.92+0.1*randn(1)
k[17] = 0.92+0.1*randn(1)
k[18] = 0.01+0.001*randn(1)
d[0] = 0.7+0.05*randn(1)
d[1] = 0.25+0.025*randn(1)
d[2] = 1.5+0.05*randn(1)
d[3] = 1.5+0.05*randn(1)
d[4] = 0.06+0.01*randn(1)
d[5] = 0.06+0.01*randn(1)
d[6] = 0.03+0.005*randn(1)
r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d))
E_simulated[:,i] = r[:,1]
for i in range(0,10000):
E_avg[i] = sum(E_simulated[i,:])/5000.
pl.plot(t,E_avg,'-ro')
pl.show()
Here are some pstats from cProfile on my Python / Numpy code:
ncalls tottime percall cumtime percall
5000 82.505 0.017 236.760 0.047 {scipy.integrate._odepack.odeint}
1 1.504 1.504 238.949 238.949 myc_rb_e2f.py:1(<module>)
5000 0.025 0.000 236.855 0.047 C:\Python27\lib\site-packages\scipy\integrate\odepack.py:18(odeint)
12291237 154.255 0.000 154.255 0.000 myc_rb_e2f.py:7(myc_rb_e2f)
Calling the Cython function is faster than calling a Python function call, it's true. But even 30 nanoseconds is rather slow by the standards of compiled languages: for comparison, a C function called by another C function might take only 3 nanoseconds, or much less if it gets inlined.
The CPython + Cython implementation is the fastest; it is 44 times faster than the CPython implementation. This is an impressive speed improvement, especially considering that the Cython code is very close to the original Python code in its design.
When Cython is slower, it's probably due to type conversions, and possibly exacerbated by a lack of type annotations. Also, if you use C datastructures in Cython, that'll tend to be faster than using Python datastructures in Cython.
Change the function definition to include the types of the parameters:
def myc_rb_e2f(np.ndarray[double,ndim=1]y, double t, np.ndarray[double, ndim=1] k, np.ndarray[double, ndim=1] d):
This will improve the running time about 3 times over the numpy implementation and 6 - 7 times over your initial cython implementation. Just FYI, I lowered the number of iterations so that I didn't have to wait forever for it to finish while testing. It should scale up with your desired number of iterations.
[pkerp@plastilin so]$ time python run_numpy.py
real 0m47.572s
user 0m45.702s
sys 0m0.049s
[pkerp@plastilin so]$ time python run_cython1.py
real 1m14.851s
user 1m12.308s
sys 0m0.135s
[pkerp@plastilin so]$ time python run_cython2.py
real 0m15.774s
user 0m14.115s
sys 0m0.105s
Edit:
Also, you don't have to create a new array each time you want to return the results from myc_rb_e2f
. You can just declare a results array in main
, pass it in on every call, and then fill it in. It will save you a lot of unnecessary allocation. This more than halves the previous best running time:
[pkerp@plastilin so]$ time python run_cython3.py
real 0m6.165s
user 0m4.818s
sys 0m0.152s
And the code:
cimport numpy as np
import numpy as np
from numpy import *
import pylab as pl
from pylab import *
from scipy import integrate
def myc_rb_e2f(np.ndarray[double,ndim=1]y, double t, np.ndarray[double, ndim=1] k, np.ndarray[double, ndim=1] d, np.ndarray[double, ndim=1] res):
cdef double S = 0.01
if t > 300.0:
S = 5.0
#if t > 400
#S = 0.01
cdef double t1 = k[0]*S/(k[7]+S)
cdef double t2 = k[1]*(y[0]/(k[14]+y[0]))*(y[1]/(k[15]+y[1]))
cdef double t3 = k[5]*y[0]/(k[14]+y[0])
cdef double t4 = k[11]*y[2]*y[6]/(k[16]+y[6])
cdef double t5 = k[12]*y[3]*y[6]/(k[17]+y[6])
cdef double t6 = k[2]*y[0]/(k[14]+y[0])
cdef double t7 = k[3]*S/(k[7]+S)
cdef double t8 = k[6]*y[1]/(k[15]+y[1])
cdef double t9 = k[13]*y[5]/(k[18]+y[5])
cdef double t10 = k[9]*y[2]*y[4]/(k[16]+y[4])
cdef double t11 = k[10]*y[3]*y[4]/(k[17]+y[4])
cdef double dM = t1-d[0]*y[0]
cdef double dE = t2+t3+t4+t5-k[8]*y[4]*y[1]-d[1]*y[1]
cdef double dCD = t6+t7-d[2]*y[2]
cdef double dCE = t8-d[3]*y[3]
cdef double dR = k[4]+t9-k[8]*y[4]*y[1]-t10-t11-d[4]*y[4]
cdef double dRP = t10+t11+t4+t5-t9-d[5]*y[5]
cdef double dRE = k[8]*y[4]*y[1]-t4-t5-d[6]*y[6]
res[0] = dM
res[1] = dE
res[2] = dCD
res[3] = dCE
res[4] = dR
res[5] = dRP
res[6] = dRE
return res
def main():
cdef np.ndarray[double,ndim=1] t = np.zeros(467)
cdef np.ndarray[double,ndim=1] results = np.zeros(7)
t = np.linspace(0.,3000.,467.)
# Initial concentrations of [M,E,CD,CE,R,RP,RE]
cdef np.ndarray[double,ndim=1] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25])
cdef np.ndarray[double,ndim=2] E_simulated = np.zeros([467,554])
cdef np.ndarray[double,ndim=2] r = np.zeros([467,7])
cdef np.ndarray[double,ndim=1] E_avg = np.zeros([467])
cdef np.ndarray[double,ndim=1] k = np.zeros([19])
cdef np.ndarray[double,ndim=1] d = np.zeros([7])
cdef int i
for i in range (0,554):
k[0] = 1.+0.1*randn(1)
k[1] = 0.15+0.05*randn(1)
k[2] = 0.2+0.05*randn(1)
k[3] = 0.2+0.05*randn(1)
k[4] = 0.35+0.05*randn(1)
k[5] = 0.001+0.0001*randn(1)
k[6] = 0.5+0.05*randn(1)
k[7] = 0.3+0.05*randn(1)
k[8] = 30.+5.*randn(1)
k[9] = 18.+3.*randn(1)
k[10] = 18.+3.*randn(1)
k[11] = 18.+3.*randn(1)
k[12] = 18.+3.*randn(1)
k[13] = 3.6+0.5*randn(1)
k[14] = 0.15+0.05*randn(1)
k[15] = 0.15+0.05*randn(1)
k[16] = 0.92+0.1*randn(1)
k[17] = 0.92+0.1*randn(1)
k[18] = 0.01+0.001*randn(1)
d[0] = 0.7+0.05*randn(1)
d[1] = 0.25+0.025*randn(1)
d[2] = 1.5+0.05*randn(1)
d[3] = 1.5+0.05*randn(1)
d[4] = 0.06+0.01*randn(1)
d[5] = 0.06+0.01*randn(1)
d[6] = 0.03+0.005*randn(1)
r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d,results))
E_simulated[:,i] = r[:,1]
for i in range(0,467):
E_avg[i] = sum(E_simulated[i,:])/554.
#pl.plot(t,E_avg,'-ro')
#pl.show()
if __name__ == "__main__":
main()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With