Source code for blackbox_mpc.policies.random_policy

from blackbox_mpc.policies.model_free_base_policy import ModelFreeBasePolicy
import tensorflow as tf


[docs]class RandomPolicy(ModelFreeBasePolicy):
[docs] def __init__(self, number_of_agents, env_action_space): """ This is the random policy for controlling the agent Parameters --------- env_action_space: gym.ActionSpace Defines the action space of the gym environment. number_of_agents: tf.int32 Defines the number of runner running in parallel """ super(RandomPolicy, self).__init__() self._num_of_agents = number_of_agents self._action_lower_bound = tf.constant(env_action_space.high, dtype=tf.float32) self._action_upper_bound = tf.constant(env_action_space.low, dtype=tf.float32) return
[docs] def act(self, observations, t, exploration_noise=False): """ This is the act function for the random policy, which should be called to provide the action to be executed at the current time step. Parameters --------- observations: tf.float32 Defines the current observations received from the environment. t: tf.float32 Defines the current timestep. exploration_noise: bool Defines if exploration noise should be added to the action to be executed. Returns ------- action: tf.float32 The action to be executed for each of the runner (dims = runner X dim_U) """ return tf.random.uniform([self._num_of_agents, *self._action_lower_bound.shape], self._action_lower_bound, self._action_upper_bound, dtype=tf.float32)
[docs] def reset(self): """ This is the reset function for the random policy, which should be called at the beginning of the episode. """ return