Jun / Sep 25 2019

Chapter04 Gambler Problem

using ReinforcementLearning, ReinforcementLearningEnvironments
using Distributions
using Plots

const pₕ = 0.4
const WinCapital = 100

decode_state(s::Int) = s - 1
encode_state(s::Int) = s + 1

function nextstep(s::Int, a::Int)
    s = decode_state(s)
    a = min(s, a)
    if s == WinCapital
        [(nextstate=encode_state(s), reward=0., prob=1.0)]
    elseif s == 0
        [(nextstate=encode_state(s), reward=0., prob=1.0)]
    else
        [(nextstate=encode_state(min(s+a, WinCapital)), reward= s+a >= WinCapital ? 1.0 : 0., prob=pₕ),
         (nextstate=encode_state(max(s-a, 0)), reward=0., prob=1-pₕ)]
    end
end

const GamblerProblemEnvModel = DeterministicDistributionModel([nextstep(s, a) for s in 1:(WinCapital+1), a in 1:WinCapital]);
V = TabularVApproximator(1+WinCapital)
value_iteration!(V=V, model=GamblerProblemEnvModel, γ=1.0, max_iter=1000)
plot(V.table[2:end-1])