So I have some neural network code in Python which I rewrote in Julia. The straight Python code runs in about 7 seconds, while both the Julia and PyPy code run in about 0.75 seconds
sigmoid(z::Float64) = 1/(1 + exp(-z))
sigmoidPrime(z::Float64) = sigmoid(z) * (1 - sigmoid(z))
### Types ###
abstract AbstractNode
type Edge
source::AbstractNode
target::AbstractNode
weight::Float64
derivative::Float64
augmented::Bool
Edge(source::AbstractNode, target::AbstractNode) = new(source, target, randn(1,1)[1], 0.0, false)
end
type Node <: AbstractNode
incomingEdges::Vector{Edge}
outgoingEdges::Vector{Edge}
activation::Float64
activationPrime::Float64
Node() = new([], [], -1.0, -1.0)
end
type InputNode <: AbstractNode
index::Int
incomingEdges::Vector{Edge}
outgoingEdges::Vector{Edge}
activation::Float64
InputNode(index::Int) = new(index, [], [], -1.0)
end
type BiasNode <: AbstractNode
incomingEdges::Vector{Edge}
outgoingEdges::Vector{Edge}
activation::Float64
BiasNode() = new([], [], 1.0)
end
type Network
inputNodes::Vector{InputNode}
hiddenNodes::Vector{Node}
outputNodes::Vector{Node}
function Network(sizes::Array, bias::Bool=true)
inputNodes = [InputNode(i) for i in 1:sizes[1]];
hiddenNodes = [Node() for _ in 1:sizes[2]];
outputNodes = [Node() for _ in 1:sizes[3]];
for inputNode in inputNodes
for node in hiddenNodes
edge = Edge(inputNode, node);
push!(inputNode.outgoingEdges, edge)
push!(node.incomingEdges, edge)
end
end
for node in hiddenNodes
for outputNode in outputNodes
edge = Edge(node, outputNode);
push!(node.outgoingEdges, edge)
push!(outputNode.incomingEdges, edge)
end
end
if bias == true
biasNode = BiasNode()
for node in hiddenNodes
edge = Edge(biasNode, node);
push!(biasNode.outgoingEdges, edge)
push!(node.incomingEdges, edge)
end
end
new(inputNodes, hiddenNodes, outputNodes)
end
end
### Methods ###
function evaluate(obj::Node, inputVector::Array)
if obj.activation > -0.5
return obj.activation
else
weightedSum = sum([d.weight * evaluate(d.source, inputVector) for d in obj.incomingEdges])
obj.activation = sigmoid(weightedSum)
obj.activationPrime = sigmoidPrime(weightedSum)
return obj.activation
end
end
function evaluate(obj::InputNode, inputVector::Array)
obj.activation = inputVector[obj.index]
return obj.activation
end
function evaluate(obj::BiasNode, inputVector::Array)
obj.activation = 1.0
return obj.activation
end
function updateWeights(obj::AbstractNode, learningRate::Float64)
for d in obj.incomingEdges
if d.augmented == false
d.augmented = true
d.weight -= learningRate * d.derivative
updateWeights(d.source, learningRate)
d.derivative = 0.0
end
end
end
function compute(obj::Network, inputVector::Array)
output = [evaluate(node, inputVector) for node in obj.outputNodes]
for node in obj.outputNodes
clear(node)
end
return output
end
function clear(obj::AbstractNode)
for d in obj.incomingEdges
obj.activation = -1.0
obj.activationPrime = -1.0
d.augmented = false
clear(d.source)
end
end
function propagateDerivatives(obj::AbstractNode, error::Float64)
for d in obj.incomingEdges
if d.augmented == false
d.augmented = true
d.derivative += error * obj.activationPrime * d.source.activation
propagateDerivatives(d.source, error * d.weight * obj.activationPrime)
end
end
end
function backpropagation(obj::Network, example::Array)
output = [evaluate(node, example[1]) for node in obj.outputNodes]
error = output - example[2]
for (node, err) in zip(obj.outputNodes, error)
propagateDerivatives(node, err)
end
for node in obj.outputNodes
clear(node)
end
end
function train(obj::Network, labeledExamples::Array, learningRate::Float64=0.7, iterations::Int=10000)
for _ in 1:iterations
for ex in labeledExamples
backpropagation(obj, ex)
end
for node in obj.outputNodes
updateWeights(node, learningRate)
end
for node in obj.outputNodes
clear(node)
end
end
end
labeledExamples = Array[Array[[0,0,0], [0]],
Array[[0,0,1], [1]],
Array[[0,1,0], [0]],
Array[[0,1,1], [1]],
Array[[1,0,0], [0]],
Array[[1,0,1], [1]],
Array[[1,1,0], [1]],
Array[[1,1,1], [0]]];
neuralnetwork = Network([3,4,1])
@time train(neuralnetwork, labeledExamples)
I haven't provided the Python code because I'm not sure it's necessary (however I will if you really want it), I'm certainly not expecting one to spend lots of time making complete sense of this code, I'm just basically looking for glaring/systematic inefficiencies related to proper Julia implementation (as opposed to the algorithm itself).
My motivation for doing this is that designing a neural network this way is so much more natural than vectorizing the algorithm and using Numpy, but of course all that looping and jumping around the class structures is slow in Python.
Thus this seemed like a natural choice for porting to Julia and seeing if I couldn't get some major speed ups, and while an order of magnitude speed up over straight Python is cool, what I was really hoping for was an order of magnitude speed up over PyPy (some benchmarks I found online seemed to suggest this was a reasonable expectation).
Note: This has to be run in Julia 0.3 to work
This does seem like more of a code review than a question (there aren't any question marks), but I'll take a crack at it anyway. The only obvious potential performance issue is that you're allocating arrays via comprehensions in evaluate
, compute
and backpropagation
. That weighted sum computation in evaluate
would be much more efficient as a for loop. For the other two methods, you may want to use pre-allocated arrays instead of comprehensions. You can use Julia's built-in profiler to see where your code is spending most of its time – that may reveal some non-obvious hot spots that you can further optimize.
Regarding the comparison to PyPy, it's quite possible that both Julia and PyPy are doing very well with this code – at or near C performance – in which case you wouldn't expect Julia to be much faster than PyPy, since they're both close to optimal. Comparing to the performance of a C implementation would be very informative since it would show how much performance is being left on the table by both Julia and PyPy. Fortunately, this code seems like it would be pretty straightforward to port to C.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With