Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Cython code 3-4 times slower than Python / Numpy code?

I am trying to convert my Python / Numpy code to Cython code for speedup purposes. However, Cython is MUCH slower (3-4 times) than the Python / Numpy code. Am I using Cython correctly? Am I passing arguments correctly to myc_rb_etc() in my Cython code? What about when I call the integrate function? Thank you in advance for your help. Here is my Python / Numpy code:

from pylab import * 
import pylab as pl
from numpy import *
import numpy as np
from scipy import integrate

def myc_rb_e2f(y,t,k,d):

    M = y[0]
    E = y[1]
    CD = y[2]
    CE = y[3]
    R = y[4]
    RP = y[5] 
    RE = y[6]

    S = 0.01
    if t > 300:
        S = 5.0
    #if t > 400
        #S = 0.01

    t1 = k[0]*S/(k[7]+S);
    t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E));
    t3 = k[5]*M/(k[14]+M);
    t4 = k[11]*CD*RE/(k[16]+RE);
    t5 = k[12]*CE*RE/(k[17]+RE);
    t6 = k[2]*M/(k[14]+M);
    t7 = k[3]*S/(k[7]+S);
    t8 = k[6]*E/(k[15]+E);
    t9 = k[13]*RP/(k[18]+RP);
    t10 = k[9]*CD*R/(k[16]+R);
    t11 = k[10]*CE*R/(k[17]+R);

    dM = t1-d[0]*M
    dE = t2+t3+t4+t5-k[8]*R*E-d[1]*E
    dCD = t6+t7-d[2]*CD
    dCE = t8-d[3]*CE
    dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R
    dRP = t10+t11+t4+t5-t9-d[5]*RP
    dRE = k[8]*R*E-t4-t5-d[6]*RE

    dy = [dM,dE,dCD,dCE,dR,dRP,dRE]

    return dy

t = np.zeros(10000)
t = np.linspace(0.,3000.,10000.)

# Initial concentrations of [M,E,CD,CE,R,RP,RE]
y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25])
E_simulated = np.zeros([10000,5000])
E_avg = np.zeros([10000])
k = np.zeros([19])
d = np.zeros([7])

for i in range (0,5000):
    k[0] = 1.+0.1*randn(1)
    k[1] = 0.15+0.05*randn(1)
    k[2] = 0.2+0.05*randn(1)
    k[3] = 0.2+0.05*randn(1)
    k[4] = 0.35+0.05*randn(1)
    k[5] = 0.001+0.0001*randn(1)
    k[6] = 0.5+0.05*randn(1)
    k[7] = 0.3+0.05*randn(1)
    k[8] = 30.+5.*randn(1)
    k[9] = 18.+3.*randn(1)
    k[10] = 18.+3.*randn(1)
    k[11] = 18.+3.*randn(1)
    k[12] = 18.+3.*randn(1)
    k[13] = 3.6+0.5*randn(1)
    k[14] = 0.15+0.05*randn(1)
    k[15] = 0.15+0.05*randn(1)
    k[16] = 0.92+0.1*randn(1)
    k[17] = 0.92+0.1*randn(1)
    k[18] = 0.01+0.001*randn(1)
    d[0] = 0.7+0.05*randn(1)
    d[1] = 0.25+0.025*randn(1)
    d[2] = 1.5+0.05*randn(1)
    d[3] = 1.5+0.05*randn(1)
    d[4] = 0.06+0.01*randn(1)
    d[5] = 0.06+0.01*randn(1)
    d[6] = 0.03+0.005*randn(1)
    r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d))
    E_simulated[:,i] = r[:,1]

for i in range(0,10000):
    E_avg[i] = sum(E_simulated[i,:])/5000.

pl.plot(t,E_avg,'-ro')
pl.show()

Here is the code converted into Cython:

cimport numpy as np
import numpy as np
from numpy import *
import pylab as pl
from pylab import * 
from scipy import integrate

def myc_rb_e2f(y,t,k,d):

    cdef double M = y[0]
    cdef double E = y[1]
    cdef double CD = y[2]
    cdef double CE = y[3]
    cdef double R = y[4]
    cdef double RP = y[5] 
    cdef double RE = y[6]

    cdef double S = 0.01
    if t > 300.0:
        S = 5.0
    #if t > 400
        #S = 0.01

    cdef double t1 = k[0]*S/(k[7]+S)
    cdef double t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E))
    cdef double t3 = k[5]*M/(k[14]+M)
    cdef double t4 = k[11]*CD*RE/(k[16]+RE)
    cdef double t5 = k[12]*CE*RE/(k[17]+RE)
    cdef double t6 = k[2]*M/(k[14]+M)
    cdef double t7 = k[3]*S/(k[7]+S)
    cdef double t8 = k[6]*E/(k[15]+E)
    cdef double t9 = k[13]*RP/(k[18]+RP)
    cdef double t10 = k[9]*CD*R/(k[16]+R)
    cdef double t11 = k[10]*CE*R/(k[17]+R)

    cdef double dM = t1-d[0]*M
    cdef double dE = t2+t3+t4+t5-k[8]*R*E-d[1]*E
    cdef double dCD = t6+t7-d[2]*CD
    cdef double dCE = t8-d[3]*CE
    cdef double dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R
    cdef double dRP = t10+t11+t4+t5-t9-d[5]*RP
    cdef double dRE = k[8]*R*E-t4-t5-d[6]*RE

    dy = [dM,dE,dCD,dCE,dR,dRP,dRE]

    return dy


def main():
    cdef np.ndarray[double,ndim=1] t = np.zeros(10000)
    t = np.linspace(0.,3000.,10000.)
    # Initial concentrations of [M,E,CD,CE,R,RP,RE]
    cdef np.ndarray[double,ndim=1] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25])
    cdef np.ndarray[double,ndim=2] E_simulated = np.zeros([10000,5000])
    cdef np.ndarray[double,ndim=2] r = np.zeros([10000,7])
    cdef np.ndarray[double,ndim=1] E_avg = np.zeros([10000])
    cdef np.ndarray[double,ndim=1] k = np.zeros([19])
    cdef np.ndarray[double,ndim=1] d = np.zeros([7])
    cdef int i
    for i in range (0,5000):
        k[0] = 1.+0.1*randn(1)
        k[1] = 0.15+0.05*randn(1)
        k[2] = 0.2+0.05*randn(1)
        k[3] = 0.2+0.05*randn(1)
        k[4] = 0.35+0.05*randn(1)
        k[5] = 0.001+0.0001*randn(1)
        k[6] = 0.5+0.05*randn(1)
        k[7] = 0.3+0.05*randn(1)
        k[8] = 30.+5.*randn(1)
        k[9] = 18.+3.*randn(1)
        k[10] = 18.+3.*randn(1)
        k[11] = 18.+3.*randn(1)
        k[12] = 18.+3.*randn(1)
        k[13] = 3.6+0.5*randn(1)
        k[14] = 0.15+0.05*randn(1)
        k[15] = 0.15+0.05*randn(1)
        k[16] = 0.92+0.1*randn(1)
        k[17] = 0.92+0.1*randn(1)
        k[18] = 0.01+0.001*randn(1)
        d[0] = 0.7+0.05*randn(1)
        d[1] = 0.25+0.025*randn(1)
        d[2] = 1.5+0.05*randn(1)
        d[3] = 1.5+0.05*randn(1)
        d[4] = 0.06+0.01*randn(1)
        d[5] = 0.06+0.01*randn(1)
        d[6] = 0.03+0.005*randn(1)
        r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d))
        E_simulated[:,i] = r[:,1]
    for i in range(0,10000):
        E_avg[i] = sum(E_simulated[i,:])/5000.
    pl.plot(t,E_avg,'-ro')
    pl.show()

Here are some pstats from cProfile on my Python / Numpy code:

ncalls tottime percall cumtime percall

5000 82.505 0.017 236.760 0.047 {scipy.integrate._odepack.odeint}

1 1.504 1.504 238.949 238.949 myc_rb_e2f.py:1(<module>)

5000 0.025 0.000 236.855 0.047 C:\Python27\lib\site-packages\scipy\integrate\odepack.py:18(odeint)

12291237 154.255 0.000 154.255 0.000 myc_rb_e2f.py:7(myc_rb_e2f)

like image 814
Zack Avatar asked Oct 24 '12 07:10

Zack


People also ask

Can Cython be slower than Python?

Calling the Cython function is faster than calling a Python function call, it's true. But even 30 nanoseconds is rather slow by the standards of compiled languages: for comparison, a C function called by another C function might take only 3 nanoseconds, or much less if it gets inlined.

How much faster is Cython than Python?

The CPython + Cython implementation is the fastest; it is 44 times faster than the CPython implementation. This is an impressive speed improvement, especially considering that the Cython code is very close to the original Python code in its design.

Why is Cython slow?

When Cython is slower, it's probably due to type conversions, and possibly exacerbated by a lack of type annotations. Also, if you use C datastructures in Cython, that'll tend to be faster than using Python datastructures in Cython.


1 Answers

Change the function definition to include the types of the parameters:

def myc_rb_e2f(np.ndarray[double,ndim=1]y, double t, np.ndarray[double, ndim=1] k, np.ndarray[double, ndim=1] d):

This will improve the running time about 3 times over the numpy implementation and 6 - 7 times over your initial cython implementation. Just FYI, I lowered the number of iterations so that I didn't have to wait forever for it to finish while testing. It should scale up with your desired number of iterations.

[pkerp@plastilin so]$ time python run_numpy.py

real    0m47.572s
user    0m45.702s
sys     0m0.049s

[pkerp@plastilin so]$ time python run_cython1.py

real    1m14.851s
user    1m12.308s
sys     0m0.135s

[pkerp@plastilin so]$ time python run_cython2.py

real    0m15.774s
user    0m14.115s
sys     0m0.105s

Edit:

Also, you don't have to create a new array each time you want to return the results from myc_rb_e2f. You can just declare a results array in main, pass it in on every call, and then fill it in. It will save you a lot of unnecessary allocation. This more than halves the previous best running time:

[pkerp@plastilin so]$ time python run_cython3.py

real    0m6.165s
user    0m4.818s
sys     0m0.152s

And the code:

cimport numpy as np
import numpy as np
from numpy import *
import pylab as pl
from pylab import * 
from scipy import integrate

def myc_rb_e2f(np.ndarray[double,ndim=1]y, double t, np.ndarray[double, ndim=1] k, np.ndarray[double, ndim=1] d, np.ndarray[double, ndim=1] res):

    cdef double S = 0.01
    if t > 300.0:
        S = 5.0
    #if t > 400
        #S = 0.01

    cdef double t1 = k[0]*S/(k[7]+S)
    cdef double t2 = k[1]*(y[0]/(k[14]+y[0]))*(y[1]/(k[15]+y[1]))
    cdef double t3 = k[5]*y[0]/(k[14]+y[0])
    cdef double t4 = k[11]*y[2]*y[6]/(k[16]+y[6])
    cdef double t5 = k[12]*y[3]*y[6]/(k[17]+y[6])
    cdef double t6 = k[2]*y[0]/(k[14]+y[0])
    cdef double t7 = k[3]*S/(k[7]+S)
    cdef double t8 = k[6]*y[1]/(k[15]+y[1])
    cdef double t9 = k[13]*y[5]/(k[18]+y[5])
    cdef double t10 = k[9]*y[2]*y[4]/(k[16]+y[4])
    cdef double t11 = k[10]*y[3]*y[4]/(k[17]+y[4])

    cdef double dM = t1-d[0]*y[0]
    cdef double dE = t2+t3+t4+t5-k[8]*y[4]*y[1]-d[1]*y[1]
    cdef double dCD = t6+t7-d[2]*y[2]
    cdef double dCE = t8-d[3]*y[3]
    cdef double dR = k[4]+t9-k[8]*y[4]*y[1]-t10-t11-d[4]*y[4]
    cdef double dRP = t10+t11+t4+t5-t9-d[5]*y[5]
    cdef double dRE = k[8]*y[4]*y[1]-t4-t5-d[6]*y[6]

    res[0] = dM
    res[1] = dE
    res[2] = dCD
    res[3] = dCE
    res[4] = dR
    res[5] = dRP
    res[6] = dRE

    return res


def main():
    cdef np.ndarray[double,ndim=1] t = np.zeros(467)
    cdef np.ndarray[double,ndim=1] results = np.zeros(7)
    t = np.linspace(0.,3000.,467.)
    # Initial concentrations of [M,E,CD,CE,R,RP,RE]
    cdef np.ndarray[double,ndim=1] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25])
    cdef np.ndarray[double,ndim=2] E_simulated = np.zeros([467,554])
    cdef np.ndarray[double,ndim=2] r = np.zeros([467,7])
    cdef np.ndarray[double,ndim=1] E_avg = np.zeros([467])
    cdef np.ndarray[double,ndim=1] k = np.zeros([19])
    cdef np.ndarray[double,ndim=1] d = np.zeros([7])
    cdef int i
    for i in range (0,554):
        k[0] = 1.+0.1*randn(1)
        k[1] = 0.15+0.05*randn(1)
        k[2] = 0.2+0.05*randn(1)
        k[3] = 0.2+0.05*randn(1)
        k[4] = 0.35+0.05*randn(1)
        k[5] = 0.001+0.0001*randn(1)
        k[6] = 0.5+0.05*randn(1)
        k[7] = 0.3+0.05*randn(1)
        k[8] = 30.+5.*randn(1)
        k[9] = 18.+3.*randn(1)
        k[10] = 18.+3.*randn(1)
        k[11] = 18.+3.*randn(1)
        k[12] = 18.+3.*randn(1)
        k[13] = 3.6+0.5*randn(1)
        k[14] = 0.15+0.05*randn(1)
        k[15] = 0.15+0.05*randn(1)
        k[16] = 0.92+0.1*randn(1)
        k[17] = 0.92+0.1*randn(1)
        k[18] = 0.01+0.001*randn(1)
        d[0] = 0.7+0.05*randn(1)
        d[1] = 0.25+0.025*randn(1)
        d[2] = 1.5+0.05*randn(1)
        d[3] = 1.5+0.05*randn(1)
        d[4] = 0.06+0.01*randn(1)
        d[5] = 0.06+0.01*randn(1)
        d[6] = 0.03+0.005*randn(1)
        r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d,results))
        E_simulated[:,i] = r[:,1]
    for i in range(0,467):
        E_avg[i] = sum(E_simulated[i,:])/554.
    #pl.plot(t,E_avg,'-ro')
    #pl.show()

if __name__ == "__main__":
    main()
like image 60
juniper- Avatar answered Sep 30 '22 12:09

juniper-