Source code for blackbox_mpc.utils.pendulum

import tensorflow as tf
import numpy as np


@tf.function
def _pendulum_angle_normalize(x):
    return (((x + np.pi) % (2 * np.pi)) - np.pi)


[docs]@tf.function
def pendulum_reward_function(current_state, next_state, actions):
    """
        The pendulum state reward function

       Parameters
       ---------
       current_state: tf.float32
            represents the current state of the system (Bxdim_S)
       next_state: tf.float32
            represents the next state of the system (Bxdim_S)

       Returns
        -------
        rewards: tf.float32
            The reward corresponding to each of the pairs current_state, next_state
       """
    return -((_pendulum_angle_normalize(
        tf.math.atan2(current_state[:, 1], current_state[:, 0]))
              ** tf.constant(2, dtype=tf.float32)) + tf.constant(0.1,
                                                                 dtype=tf.float32) * current_state[
                                                                                     :,
                                                                                     2]
             ** tf.constant(2, dtype=tf.float32)) - \
           (tf.constant(0.001, dtype=tf.float32) *
            tf.reduce_sum(tf.square(actions), axis=1))


[docs]class PendulumTrueModel(tf.Module):
[docs]    def __init__(self, name=None):
        """
        This is the pendulum true model for the gym environment


        Parameters
        ---------
        name: String
            Defines the name of the block of the pendulum true model.
        """
        super(PendulumTrueModel, self).__init__(name=name)
        self.g = tf.constant(10, dtype=tf.float32)
        self.max_torque = tf.constant(2.0, dtype=tf.float32)
        self.max_speed = tf.constant(8.0, dtype=tf.float32)
        self.m = tf.constant(1., dtype=tf.float32)
        self.l = tf.constant(1., dtype=tf.float32)
        self.dt = tf.constant(.05, dtype=tf.float32)
        self.pi = tf.constant(float(np.pi), dtype=tf.float32)

[docs]    @tf.function
    def __call__(self, x, train): #cos(theta), sin(theta), dtheta, u
        """
        This is the call function for the pendulum true model.


        Parameters
        ---------
        x: tf.float32
            Defines the (s_t, a_t) which is the state and action stacked on top of each other,
            (dims = Batch X (dim_S + dim_U)) [cos(theta), sin(theta), dtheta, u]
        train: tf.bool
            Placeholder to confirm with the base class.


        Returns
        -------
        output: tf.float32
            Defines the next state (s_t+1) with (dims = Batch X dim_S), [cos(theta), sin(theta), dtheta]
        """
        u = x[:, 3]
        thdot = x[:, 2]
        theta_cos = x[:, 0]
        theta_sin = x[:, 1]
        theta = tf.math.atan2(theta_sin, theta_cos)
        newthdot = thdot + (-tf.constant(3, dtype=tf.float32)*self.g/ (tf.constant(2, dtype=tf.float32)*self.l)
                            * tf.math.sin(theta + self.pi) + tf.constant(3, dtype=tf.float32) /
                            (self.m * self.l ** tf.constant(2, dtype=tf.float32)) * u) * self.dt
        newth = theta + newthdot * self.dt
        newthdot = tf.clip_by_value(newthdot, -self.max_speed, self.max_speed)
        new_state = tf.concat([tf.expand_dims(tf.math.cos(newth), -1),
                          tf.expand_dims(tf.math.sin(newth), -1),
                          tf.expand_dims(newthdot, -1)], axis=1)
        deviation = new_state - x[:, :3]
        return deviation