VINTrainer(agent: VINCNN, envs, val_envs, epsilon=0.1, epsilon_final=0.1, epsilon_steps=1000000, epsilon_test=0.1, returns: Returns = Returns.NSTEP, log_dir='logs/', model_dir='models/', total_steps=50000000, nsteps=20, gamma=0.99, lambda_=0.95, validate_freq=1000000.0, save_freq=0, render_freq=0, update_target_freq=0, num_val_episodes=50, log_scalars=True)
Standalone trainer for the Value Iteration Network agent.
Doesn't subclass :class:SyncMultiEnvTrainer because VIN's
optimisation loop and observation interface are noticeably different
(no value-targets / advantage estimates, location-aware action
selection). Could be migrated in a future refactor.
Source code in rlib/VIN/trainer.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 | def __init__(
self,
agent: VINCNN,
envs,
val_envs,
epsilon=0.1,
epsilon_final=0.1,
epsilon_steps=1000000,
epsilon_test=0.1,
returns: Returns = Returns.NSTEP,
log_dir='logs/',
model_dir='models/',
total_steps=50000000,
nsteps=20,
gamma=0.99,
lambda_=0.95,
validate_freq=1e6,
save_freq=0,
render_freq=0,
update_target_freq=0,
num_val_episodes=50,
log_scalars=True,
):
self.agent = agent
self.env = envs
self.num_envs = len(envs)
self.val_envs = val_envs
self.total_steps = total_steps
self.action_size = self.agent.action_size
self.epsilon = epsilon
self.epsilon_test = epsilon_test
self.states = self.env.reset()
self.loc = self.get_locs()
print('locs', self.loc)
self.total_steps = int(total_steps)
self.nsteps = nsteps
self.returns = returns
self.gamma = gamma
self.lambda_ = lambda_
self.validate_freq = int(validate_freq)
self.num_val_episodes = num_val_episodes
self.save_freq = int(save_freq)
self.render_freq = render_freq
self.target_freq = int(update_target_freq)
self.t = 1
self.validate_rewards = []
self.lock = threading.Lock()
self.scheduler = self.linear_schedule(epsilon, epsilon_final, epsilon_steps)
self.log_scalars = log_scalars
self.log_dir = log_dir
self.model_dir = model_dir
self.s = 0 # number of saves made
if log_scalars:
# Tensorboard Variables
train_log_dir = self.log_dir + '/train'
self.train_writer = SummaryWriter(train_log_dir)
|
save
Save the agent weights to <model_dir>/<s>.pt.
Source code in rlib/VIN/trainer.py
| def save(self, s: int) -> None:
"""Save the agent weights to ``<model_dir>/<s>.pt``."""
os.makedirs(self.model_dir, exist_ok=True)
torch.save(self.agent.state_dict(), f"{self.model_dir}/{s}.pt")
|
update_target
No-op: VIN has no target network. Defined so the training loop
can call it unconditionally when update_target_freq > 0.
Source code in rlib/VIN/trainer.py
| def update_target(self) -> None:
"""No-op: VIN has no target network. Defined so the training loop
can call it unconditionally when ``update_target_freq > 0``."""
return
|