Jun / Sep 25 2019
Chapter05 Left Right
using ReinforcementLearning, ReinforcementLearningEnvironments
using RLIntro, RLIntro.LeftRight
using Plots
env = LeftRightEnv() ns, na = length(observation_space(env)), length(action_space(env))
(2, 2)
struct CollectValue <: AbstractHook values::Vector{Float64} CollectValue() = new([]) end (f::CollectValue)(::PostEpisodeStage, agent, env, obs) = push!(f.values, agent.π.π_target.learner.approximator(1))
p = plot() for _ in 1:10 agent = Agent( π=OffPolicy( VBasedPolicy( learner=MonteCarloLearner( approximator=TabularVApproximator(na), kind=FIRST_VISIT, sampling=ORDINARY_IMPORTANCE_SAMPLING ), f=TabularDeterministicPolicy(table=ones(Int, ns),nactions=na) ), TabularRandomPolicy(fill(0.5, ns, na)) ), buffer=episode_RTSA_buffer() ) hook = CollectValue() run(agent, env, StopAfterEpisode(100000, is_show_progress=false);hook=hook) plot!(p, hook.values, xscale = :log10) end p