So I have a little sieve of Eratosthenes function written in both Python and Julia, and I'm comparing run times.
Here is the Python code:
import time
def get_primes(n):
numbers = set(range(n, 1, -1))
primes = []
while numbers:
p = numbers.pop()
primes.append(p)
numbers.difference_update(set(range(p*2, n+1, p)))
return primes
start = time.time()
get_primes(10000)
print time.time() - start
And here is the Julia code:
function get_primes(n)
numbers = [2:n]
primes = Int[]
while numbers != []
p = numbers[1]
push!(primes, p)
numbers = setdiff(numbers, [p*i for i=1:int(n/p)])
end
return primes
end
@time get_primes(10000);
The Python code runs in about .005 seconds, while the Julia code takes about .5 seconds, so that means Python runs about 100x times faster. There's probably a perfectly logical reason for this, but I really have no idea what I'm doing.
The main difference is that in Python you're allocating a single set, number
, and then modifying it in place, while in Julia, you're allocating a new array on every iteration of the loop. Another difference is that you're using a set in Python and an array in Julia (what Python calls a "list"). Let's change the Julia code to eliminate these two differences:
function get_primes(n)
numbers = IntSet(2:n)
primes = Int[]
while !isempty(numbers)
p = shift!(numbers)
push!(primes, p)
setdiff!(numbers, p:p:n)
end
return primes
end
With this change, on my system, the Julia code is 10x faster than the Python code (0.0005 vs. 0.0048 seconds). Note that I also used a range as the second argument of the setdiff!
– it's both terser and faster (1.5x) than constructing an array with a comprehension.
A somewhat more idiomatic style of writing this in Julia would be to use an array of booleans, turning them on and off:
function eratosthenes(n)
primes = fill(true,n)
primes[1] = false
for p = 2:n
primes[p] || continue
for i = 2:div(n,p)
primes[p*i] = false
end
end
find(primes)
end
The find
at the end returns the indices where a vector is non-zero (i.e. true). On my machine, this is 5x faster (0.0001 seconds) than the other Julia version and 45x faster than the Python version.
With this code
function get_primes(n)
numbers::Set{Int} = Set(2:n)
primes::Array{Int64,1} = []
while !isempty(numbers)
p = minimum(numbers)
push!(primes,p);
setdiff!(numbers,Set(p:p:n))
end
return primes
end
I got
julia> @time get_primes(10000);
elapsed time: 0.029556727 seconds (1656448 bytes allocated)
The other (original) version is indeed bad because it spends most of the time in setdiff re-hashing after the calculation -- my timings for the unaltered version were similar to OP's. So it looks like setdiff! is perhaps 100x faster than setdiff, but accepts only Sets not Arrays.
This is still 6x slower than python but 13x faster than when using setdiff. However, if there were some way to maintain an ordered set and always take the first element, then it would likely be much faster because almost 90% of the time (209/235) is being spent finding the minimum of a set.
235 task.jl; anonymous; line: 96
235 REPL.jl; eval_user_input; line: 54
235 profile.jl; anonymous; line: 14
209 /Users/jeffw/src/errata/julia/sive.jl; get_primes; line: 5
2 reduce.jl; mapfoldl; line: 75
2 dict.jl; skip_deleted; line: 669
207 reduce.jl; mapfoldl; line: 81
1 reduce.jl; mapfoldl_impl; line: 54
1 dict.jl; skip_deleted; line: 670
199 reduce.jl; mapfoldl_impl; line: 57
10 dict.jl; skip_deleted; line: 668
132 dict.jl; skip_deleted; line: 669
12 dict.jl; skip_deleted; line: 670
27 dict.jl; skip_deleted; line: 672
7 reduce.jl; mapfoldl_impl; line: 58
1 /Users/jeffw/src/errata/julia/sive.jl; get_primes; line: 6
25 /Users/jeffw/src/errata/julia/sive.jl; get_primes; line: 7
14 set.jl; setdiff!; line: 24
1 dict.jl; skip_deleted; line: 669
function get_primes(n)
numbers::IntSet = IntSet(2:n)
primes::Array{Int64,1} = []
while !isempty(numbers)
p = shift!(numbers)
push!(primes,p);
setdiff!(numbers,Set(p:p:n))
end
return primes
end
julia> @time get_primes(10000);
elapsed time: 0.003691856 seconds (1463152 bytes allocated)
I tested many approaches and I have found this one as the fastest in 8 different tests.
# Julia 0.4.0 [Execution time = 95us (After warm up!!)]
function get_primes(n::Int64)
numbers = fill(true,n)
numbers[1] = false
for i = 2:isqrt(n)
if numbers[i]
for j = i:div(n,i)
numbers[i*j] = false
end
end
end
primes = find(numbers) # t=3-5us
return primes
end
@time primes = get_primes(10000)
println(get_primes(100))
First Julia code on this page calculated n=10000 in about 1'000'000us on my PC, while this one took about 95us which is 10'000x faster.
# Python 3.4 [Execution time = 5ms (Every time)]
def get_primes(n):
m = n+1
numbers = [True for i in range(m)]
for i in range(2, int(math.sqrt(n))):
if numbers[i]:
for j in range(i*i, m, i):
numbers[j] = False
primes = []
for i in range(2, m):
if numbers[i]:
primes.append(i)
return primes
start = time.time()
primes = get_primes(10000)
print(time.time() - start)
print(get_primes(100))
Python test
# Python n=100e6 [Execution time 52.906s]
start = time.time()
get_primes(int(100e6))
print(time.time() - start)
Julia test
# Julia n=100e6 [Execution time 3.694s]
@time get_primes(convert(Int64,100e6))
The difference is now less. Julia vs Python is about 12x faster.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With