Skip to content

Unreal

Unreal

UNREAL-A2C agents.

UnrealTrainer

UnrealTrainer(envs, agent: UnrealA2C2, val_envs, config: UnrealTrainerConfig)

Bases: SyncMultiEnvTrainer

Trainer for the feed-forward UNREAL agent.

Source code in rlib/Unreal/feedforward.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def __init__(
    self,
    envs,
    agent: UnrealA2C2,
    val_envs,
    config: UnrealTrainerConfig,
):
    super().__init__(envs, agent, val_envs, config=config)

    self.replay = deque([], maxlen=config.replay_length)  # replay length per actor
    self.action_size = self.agent.action_size
    self.normalise_obs = config.normalise_obs

    if self.normalise_obs:
        self.obs_running = RunningMeanStd()
        self.state_mean = np.zeros_like(self.states)
        self.state_std = np.ones_like(self.states)
        self.aux_reward_rolling = RunningMeanStd()

norm_obs

norm_obs(obs)

normalise pixel intensity changes by recording min and max pixel observations not using per pixel normalisation because expected image is singular greyscale frame

Source code in rlib/Unreal/feedforward.py
261
262
263
264
265
def norm_obs(self, obs):
    '''normalise pixel intensity changes by recording min and max pixel observations
    not using per pixel normalisation because expected image is singular greyscale frame
    '''
    return (obs - self.state_min) * (1 / (self.state_max - self.state_min))

UnrealTrainerConfig dataclass

UnrealTrainerConfig(train_mode: TrainMode = TrainMode.NSTEP, returns: Returns = Returns.NSTEP, total_steps: int = 50000000, nsteps: int = 5, gamma: float = 0.99, lambda_: float = 0.95, validate_freq: int = 1000000, num_val_episodes: int = 50, max_val_steps: int = 10000, log_dir: str = 'logs/', model_dir: str = 'models/', save_freq: int = 0, log_scalars: bool = True, update_target_freq: int = 0, render_freq: int = 0, normalise_obs: bool = True, replay_length: int = 2000)

Bases: TrainerConfig

Hyperparameters for the feed-forward :class:UnrealTrainer.

UnrealLSTMTrainer

UnrealLSTMTrainer(envs, agent: UnrealA2C, val_envs, config: TrainerConfig)

Bases: SyncMultiEnvTrainer

Trainer for the recurrent UNREAL agent (LSTM body, action+reward feed-in).

Source code in rlib/Unreal/lstm.py
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
def __init__(
    self,
    envs,
    agent: UnrealA2C,
    val_envs,
    config: TrainerConfig,
):
    super().__init__(envs, agent, val_envs, config=config)

    self.replay = deque([], maxlen=2000)
    self.action_size = self.agent.action_size
    self.prev_hidden = self.agent.get_initial_hidden(len(self.env))
    zeros = np.zeros((len(self.env)), dtype=np.int32)
    self.prev_actions_rewards = concat_action_reward(
        zeros, zeros, self.action_size + 1
    )  # start with action 0 and reward 0