Chapter05 Left Right

using ReinforcementLearning, ReinforcementLearningEnvironments

18.4s

Julia

using RLIntro, RLIntro.LeftRight

0.6s

Julia

using Plots

6.6s

Julia

env = LeftRightEnv()
ns, na = length(observation_space(env)), length(action_space(env))

2.0s

Julia

(2, 2)

struct CollectValue <: AbstractHook
    values::Vector{Float64}
    CollectValue() = new([])
end

(f::CollectValue)(::PostEpisodeStage, agent, env, obs) = push!(f.values, agent.π.π_target.learner.approximator(1))

0.2s

Julia

p = plot()
for _ in 1:10
    agent = Agent(
        π=OffPolicy(
            VBasedPolicy(
                learner=MonteCarloLearner(
                    approximator=TabularVApproximator(na),
                    kind=FIRST_VISIT,
                    sampling=ORDINARY_IMPORTANCE_SAMPLING
                    ),
                f=TabularDeterministicPolicy(table=ones(Int, ns),nactions=na)
                ),
            TabularRandomPolicy(fill(0.5, ns, na))
            ),
        buffer=episode_RTSA_buffer()
    )
    hook = CollectValue()
    run(agent, env, StopAfterEpisode(100000, is_show_progress=false);hook=hook)
    plot!(p, hook.values, xscale = :log10)
end
p

78.9s

Julia