Source code for blackbox_mpc.optimizers.random_search

import tensorflow as tf
import numpy as np
from blackbox_mpc.optimizers.optimizer_base import OptimizerBase


[docs]class RandomSearchOptimizer(OptimizerBase):
[docs]    def __init__(self, env_action_space, env_observation_space,
                 planning_horizon=50, population_size=1024, num_agents=5):
        """
        This class is responsible for performing random shooting and choosing the best
        possible predicted trajectory and returning the first action of this trajectory.


        Parameters
        ---------
        env_action_space: gym.ActionSpace
            Defines the action space of the gym environment.
        env_observation_space: gym.ObservationSpace
            Defines the observation space of the gym environment.
        planning_horizon: Int
            Defines the planning horizon for the optimizer (how many steps to lookahead and optimize for).
        population_size: tf.int32
            Defines the population size of the particles evaluated at each iteration.
        num_agents: tf.int32
            Defines the number of runner running in parallel
        """
        super(RandomSearchOptimizer, self).__init__(name=None,
                                                    planning_horizon=planning_horizon,
                                                    max_iterations=None,
                                                    num_agents=num_agents,
                                                    env_action_space=env_action_space,
                                                    env_observation_space=
                                                    env_observation_space)
        self._solution_dim = [self._num_agents, self._planning_horizon, self._dim_U]
        self._population_size = population_size
        return

    @tf.function
    def _optimize(self, current_state, time_step):
        samples = tf.random.uniform([self._population_size, *self._solution_dim], self._action_lower_bound_horizon,
                                    self._action_upper_bound_horizon, dtype=tf.float32)
        rewards = self._trajectory_evaluator(current_state, samples, time_step)
        best_particle_index = tf.cast(tf.math.argmax(rewards), dtype=tf.int32)
        samples = tf.transpose(samples, [1, 0, 2, 3])
        best_particle_index = best_particle_index + tf.range(0, samples.shape[0], dtype=tf.int32)*samples.shape[1]
        samples = tf.reshape(samples, [-1, *samples.shape[2:]])
        resulting_action = tf.gather(samples, best_particle_index)[:, 0]
        return resulting_action

[docs]    def reset(self):
        """
          This method resets the optimizer to its default state at the beginning of the trajectory/episode.
          """
        return