Chapter04 Grid World

using ReinforcementLearning, ReinforcementLearningEnvironments

16.3s

Julia

const GridWorldLinearIndices = LinearIndices((4,4))
const GridWorldCartesianIndices = CartesianIndices((4,4))

isterminal(s::CartesianIndex{2}) = s == CartesianIndex(1,1) || s == CartesianIndex(4,4) 

function nextstep(s::CartesianIndex{2}, a::CartesianIndex{2})
    ns = s + a
    if isterminal(s) || ns[1] < 1 || ns[1] > 4 || ns[2] < 1 || ns[2] > 4
        ns = s
    end
    r = isterminal(s) ? 0. : -1.0
    [(nextstate=GridWorldLinearIndices[ns], reward=r, prob=1.0)]
end

const GridWorldActions = [CartesianIndex(-1, 0),
                          CartesianIndex(1,0),
                          CartesianIndex(0, 1),
                          CartesianIndex(0, -1)]

const GridWorldEnvModel = DeterministicDistributionModel([nextstep(GridWorldCartesianIndices[s], a) for s in 1:16, a in GridWorldActions]);

0.7s

Julia

V, π = TabularVApproximator(16), TabularRandomPolicy(fill(0.25, 16, 4))
policy_evaluation!(V=V, π=π, model=GridWorldEnvModel, γ=1.0)

1.2s

Julia

TabularVApproximator([0.0, -13.9993, -19.999, -21.9989, -13.9993, -17.9992, -19.9991, -19.9991, -19.999, -19.9991, -17.9992, -13.9994, -21.9989, -19.9991, -13.9994, 0.0])

using Plots
heatmap(1:4, 1:4, reshape(V.table, 4,4), yflip=true)

45.4s

Julia

And you can compare it with the figure on the book: