Source code for blackbox_mpc.trajectory_evaluators.evaluator_base

import tensorflow as tf


[docs]class EvaluatorBase(tf.Module): """This is the base class of the trajectory evaluators"""
[docs] def __init__(self, reward_function, system_dynamics_handler, name=None): """ This is the initializer function for the Evaluator Base Class. Parameters --------- name: String Defines the name of the block of the evaluator. """ super(EvaluatorBase, self).__init__(name=name) self._reward_function = reward_function self._system_dynamics_handler = system_dynamics_handler
[docs] @tf.function def __call__(self, current_states, action_sequences, time_step): """ This is the call function for the Evaluator Base Class. It is used to calculate the rewards corresponding to each of the action sequences starting from the current state. Parameters --------- current_states: tf.float32 Defines the current state of the system, (dims=num_of_agents X dim_S) action_sequences: tf.float32 Defines the action sequences to be evaluated, (dims = population X num_of_agents X planning_horizon X dim_U) time_step: tf.float32 Defines the current timestep of the episode. Returns ------- rewards: tf.float32 The rewards corresponding to each action sequence (dims = 1 X population) """ raise Exception("__call__ function is not implemented yet")
[docs] def predict_next_state(self, current_state, current_action): """ This is the function used to predict the next state using the internal dynamics handler. Parameters --------- current_state: tf.float32 Defines the current state of the system, (dims=num_of_agents X dim_S) current_action: tf.float32 Defines the current action to be applied, (dims = num_of_agents X dim_U) Returns ------- next_state: tf.float32 Defines the next state of the system, (dims=num_of_agents X dim_S) """ raise Exception("predict_next_state function is not implemented yet")
[docs] def evaluate_next_reward(self, current_state, next_state, current_action): """ This is the function used to predict the next reward using the internal dynamics handler. Parameters --------- current_state: tf.float32 Defines the current state of the system, (dims=num_of_agents X dim_S) next_state: tf.float32 Defines the next state of the system, (dims=num_of_agents X dim_S) current_action: tf.float32 Defines the current action to be applied, (dims = num_of_agents X dim_U) Returns ------- reward: tf.float32 returns the predicted reward using the action, current state and the next one, (dims=num_of_agents X 1) """ raise Exception("evaluate_next_reward function is not implemented yet")