Source code for blackbox_mpc.optimizers.random_search

import tensorflow as tf
import numpy as np
from blackbox_mpc.optimizers.optimizer_base import OptimizerBase


[docs]class RandomSearchOptimizer(OptimizerBase):
[docs] def __init__(self, env_action_space, env_observation_space, planning_horizon=50, population_size=1024, num_agents=5): """ This class is responsible for performing random shooting and choosing the best possible predicted trajectory and returning the first action of this trajectory. Parameters --------- env_action_space: gym.ActionSpace Defines the action space of the gym environment. env_observation_space: gym.ObservationSpace Defines the observation space of the gym environment. planning_horizon: Int Defines the planning horizon for the optimizer (how many steps to lookahead and optimize for). population_size: tf.int32 Defines the population size of the particles evaluated at each iteration. num_agents: tf.int32 Defines the number of runner running in parallel """ super(RandomSearchOptimizer, self).__init__(name=None, planning_horizon=planning_horizon, max_iterations=None, num_agents=num_agents, env_action_space=env_action_space, env_observation_space= env_observation_space) self._solution_dim = [self._num_agents, self._planning_horizon, self._dim_U] self._population_size = population_size return
@tf.function def _optimize(self, current_state, time_step): samples = tf.random.uniform([self._population_size, *self._solution_dim], self._action_lower_bound_horizon, self._action_upper_bound_horizon, dtype=tf.float32) rewards = self._trajectory_evaluator(current_state, samples, time_step) best_particle_index = tf.cast(tf.math.argmax(rewards), dtype=tf.int32) samples = tf.transpose(samples, [1, 0, 2, 3]) best_particle_index = best_particle_index + tf.range(0, samples.shape[0], dtype=tf.int32)*samples.shape[1] samples = tf.reshape(samples, [-1, *samples.shape[2:]]) resulting_action = tf.gather(samples, best_particle_index)[:, 0] return resulting_action
[docs] def reset(self): """ This method resets the optimizer to its default state at the beginning of the trajectory/episode. """ return