Source code for blackbox_mpc.policies.model_based_base_policy

[docs]class ModelBasedBasePolicy(object):
[docs] def __init__(self, trajectory_evaluator): """ This is the model based policy base class for controlling the agent Parameters --------- trajectory_evaluator: EvaluatorBase Defines the trajectory evaluator to be used in the optimizer to evaluate trajectories. """ self._trajectory_evaluator = trajectory_evaluator return
[docs] def act(self, observations, t, exploration_noise=False): """ This is the act function for the model based policy base class, which should be called to provide the action to be executed at the current time step. Parameters --------- observations: tf.float32 Defines the current observations received from the environment. t: tf.float32 Defines the current timestep. exploration_noise: bool Defines if exploration noise should be added to the action to be executed. Returns ------- action: tf.float32 The action to be executed for each of the runner (dims = runner X dim_U) next_observations: tf.float32 The next observations predicted using the dynamics function learned so far. rewards_of_next_state: tf.float32 The predicted reward if the action was executed using the predicted observations. """ raise Exception("act function is not implemented yet")
[docs] def reset(self): """ This is the reset function for the model based policy base class, which should be called at the beginning of the episode. """ raise Exception("reset function is not implemented yet")