Simon Danisch / Aug 01 2019
DNN From Scratch
]up; add ImageShow Flux Colors Zygote#master https://github.com/FluxML/ZygoteRules.jl.git ImageMagick FileIO
]precompile
using Colors, ImageShow import Zygote, Flux glorot_uniform(dims...) = (rand(Float32, dims...) .- 0.5f0) .* sqrt(24.0f0/sum(dims)) struct Dense{M <: AbstractMatrix, V <: AbstractVector, F <: Function} W::M b::V func::F end function Dense(in, out, func = identity) Dense(glorot_uniform(out, in), zeros(Float32, out), func) end function (a::Dense)(x::AbstractArray) a.func.(a.W * x .+ a.b) end softmax(xs) = exp.(xs) ./ sum(exp.(xs)) relu(x::Real) = max(zero(x), x) function crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1) -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2) end
(Dense{Array{Float32,2},Array{Float32,1},typeof(relu)}(Float32[0.00688512 0.0177192 … -0.0291243 0.0272971; -0.0790889 0.0399729 … 0.0408487 -0.0320107; … ; 0.0115133 -0.0778593 … -0.0618508 -0.0136975; 0.0117493 -0.0507285 … 0.0709299 0.0293868], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], relu), Dense{Array{Float32,2},Array{Float32,1},typeof(identity)}(Float32[-0.0772521 -0.142114 … -0.12435 -0.255327; 0.101915 0.31584 … 0.113285 -0.0201409; … ; 0.11642 0.290301 … 0.227724 -0.176154; 0.0642741 0.0789304 … -0.0982047 0.166306], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], identity), softmax)
0.4s
Install (Julia)
function forward(network, input) result = input for layer in network result = layer(result) end return result end loss(network, x, y) = crossentropy(forward(network, x), y) function loss_gradient(network, input, target) # first index, to get gradient of first argument Zygote.gradient(loss, network, input, target)[1] end function apply_gradient!(a, b::Nothing, optimizer = nothing) return nothing end function apply_gradient!(a, b::NamedTuple, optimizer = nothing) for field in propertynames(b) apply_gradient!(getfield(a, field), getfield(b, field)) end return nothing end function apply_gradient!(a::Tuple, b, optimizer = nothing) for (alayer, blayer) in zip(a, b) apply_gradient!(alayer, blayer) end return nothing end function apply_gradient!(a::AbstractArray, b::AbstractArray, optimizer = nothing) a .-= 0.1 .* b return nothing end """ ADAM(η = 0.001, β = (0.9, 0.999)) [ADAM](https://arxiv.org/abs/1412.6980v8) optimiser. """ mutable struct ADAM eta::Float64 beta::Tuple{Float64,Float64} state::IdDict end ADAM(η = 0.001, β = (0.9, 0.999)) = ADAM(η, β, IdDict()) function apply_gradient!(x::AbstractArray, Δ::AbstractArray, o::ADAM) η, β = o.eta, o.beta mt, vt, βp = get!(o.state, x, (zero(x), zero(x), β)) @. mt = β[1] * mt + (1 - β[1]) * Δ @. vt = β[2] * vt + (1 - β[2]) * Δ^2 @. Δ = mt / (1 - βp[1]) / (√(vt / (1 - βp[2])) + ϵ) * η o.state[x] = (mt, vt, βp .* β) return Δ end function test(n) img = X[1:28^2, n:n] predict = Tuple(argmax(forward(network, img)))[1] - 1 predict save("/results/test.png", Gray.(reshape(img, (28, 28)))) return nothing end function train!(network, X, Y, optimizer = nothing, epochs = 100) for epoch in 1:epochs grad = loss_gradient(network, X, Y) apply_gradient!(network, grad, optimizer) epoch end end
train! (generic function with 3 methods)
network = ( Dense(28^2, 32, relu), Dense(32, 10), softmax ) imgs = Flux.Data.MNIST.images() labels = Flux.Data.MNIST.labels() Y = Flux.onehotbatch(labels, 0:9) X = Float32.(hcat(float.(reshape.(imgs, :))...)) train!(network, X, Y, ADAM())
test(23)