Simon Danisch / Aug 01 2019

DNN From Scratch

]up; add ImageShow Flux Colors Zygote#master https://github.com/FluxML/ZygoteRules.jl.git ImageMagick FileIO
]precompile
using Colors, ImageShow
import Zygote, Flux

glorot_uniform(dims...) = (rand(Float32, dims...) .- 0.5f0) .* sqrt(24.0f0/sum(dims))

struct Dense{M <: AbstractMatrix, V <: AbstractVector, F <: Function}
  W::M
  b::V
  func::F
end

function Dense(in, out, func = identity)
  Dense(glorot_uniform(out, in), zeros(Float32, out), func)
end

function (a::Dense)(x::AbstractArray)
  a.func.(a.W * x .+ a.b)
end

softmax(xs) = exp.(xs) ./ sum(exp.(xs))

relu(x::Real) = max(zero(x), x)

function crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
  -sum(y .* log.() .* weight) * 1 // size(y, 2)
end
(Dense{Array{Float32,2},Array{Float32,1},typeof(relu)}(Float32[0.00688512 0.0177192 … -0.0291243 0.0272971; -0.0790889 0.0399729 … 0.0408487 -0.0320107; … ; 0.0115133 -0.0778593 … -0.0618508 -0.0136975; 0.0117493 -0.0507285 … 0.0709299 0.0293868], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], relu), Dense{Array{Float32,2},Array{Float32,1},typeof(identity)}(Float32[-0.0772521 -0.142114 … -0.12435 -0.255327; 0.101915 0.31584 … 0.113285 -0.0201409; … ; 0.11642 0.290301 … 0.227724 -0.176154; 0.0642741 0.0789304 … -0.0982047 0.166306], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], identity), softmax)
0.4s
function forward(network, input)
  result = input
  for layer in network
    result = layer(result)
  end
  return result
end
loss(network, x, y) = crossentropy(forward(network, x), y)
function loss_gradient(network, input, target)
  # first index, to get gradient of first argument
  Zygote.gradient(loss, network, input, target)[1]
end

function apply_gradient!(a, b::Nothing, optimizer = nothing)
  return nothing
end

function apply_gradient!(a, b::NamedTuple, optimizer = nothing)
  for field in propertynames(b)
    apply_gradient!(getfield(a, field), getfield(b, field))
  end
  return nothing
end
function apply_gradient!(a::Tuple, b, optimizer = nothing)
  for (alayer, blayer) in zip(a, b)
    apply_gradient!(alayer, blayer)
  end
  return nothing
end

function apply_gradient!(a::AbstractArray, b::AbstractArray, optimizer = nothing)
  a .-= 0.1 .* b
  return nothing
end

"""
    ADAM(η = 0.001, β = (0.9, 0.999))

[ADAM](https://arxiv.org/abs/1412.6980v8) optimiser.
"""
mutable struct ADAM
  eta::Float64
  beta::Tuple{Float64,Float64}
  state::IdDict
end

ADAM(η = 0.001, β = (0.9, 0.999)) = ADAM(η, β, IdDict())

function apply_gradient!(x::AbstractArray, Δ::AbstractArray, o::ADAM)
  η, β = o.eta, o.beta
  mt, vt, βp = get!(o.state, x, (zero(x), zero(x), β))
  @. mt = β[1] * mt + (1 - β[1]) * Δ
  @. vt = β[2] * vt + (1 - β[2]) * Δ^2
  @. Δ =  mt / (1 - βp[1]) / ((vt / (1 - βp[2])) + ϵ) * η
  o.state[x] = (mt, vt, βp .* β)
  return Δ
end

function test(n)
  img = X[1:28^2, n:n]
  predict = Tuple(argmax(forward(network, img)))[1] - 1
  @show predict
  save("/results/test.png", Gray.(reshape(img, (28, 28))))
  return nothing
end

function train!(network, X, Y, optimizer = nothing, epochs = 100)
  for epoch in 1:epochs
    grad = loss_gradient(network, X, Y)
    apply_gradient!(network, grad, optimizer)
    @show epoch
  end
end
train! (generic function with 3 methods)
network = (
  Dense(28^2, 32, relu),
  Dense(32, 10),
  softmax
)
imgs = Flux.Data.MNIST.images()
labels = Flux.Data.MNIST.labels()
Y = Flux.onehotbatch(labels, 0:9)
X = Float32.(hcat(float.(reshape.(imgs, :))...))
train!(network, X, Y, ADAM())
test(23)