Jun / Sep 25 2019

Chapter05 Left Right

using ReinforcementLearning, ReinforcementLearningEnvironments
using RLIntro, RLIntro.LeftRight
env = LeftRightEnv()
ns, na = length(observation_space(env)), length(action_space(env))
(2, 2)
struct CollectValue <: AbstractHook
    values::Vector{Float64}
    CollectValue() = new([])
end

(f::CollectValue)(::PostEpisodeStage, agent, env, obs) = push!(f.values, agent.π.π_target.learner.approximator(1))
p = plot()
for _ in 1:10
    agent = Agent(
        π=OffPolicy(
            VBasedPolicy(
                learner=MonteCarloLearner(
                    approximator=TabularVApproximator(na),
                    kind=FIRST_VISIT,
                    sampling=ORDINARY_IMPORTANCE_SAMPLING
                    ),
                f=TabularDeterministicPolicy(table=ones(Int, ns),nactions=na)
                ),
            TabularRandomPolicy(fill(0.5, ns, na))
            ),
        buffer=episode_RTSA_buffer()
    )
    hook = CollectValue()
    run(agent, env, StopAfterEpisode(100000, is_show_progress=false);hook=hook)
    plot!(p, hook.values, xscale = :log10)
end
p