Source code for irlc.ex04.model_pendulum

# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import sympy as sym
from irlc.ex03.control_model import ControlModel
from irlc.ex03.control_cost import SymbolicQRCost
from irlc.ex04.discrete_control_model import DiscreteControlModel
import gymnasium as gym
from gymnasium.spaces.box import Box
from irlc.ex04.control_environment import ControlEnvironment
import numpy as np

class PendulumModel(ControlModel):
    state_labels= [r"$\theta$", r"$\frac{d \theta}{dt}$"]
    action_labels = ['Torque $u$']
    x_upright, x_down = np.asarray([0.0, 0.0]), np.asarray([np.pi, 0.0])

    def __init__(self, l=1., m=.8, friction=0.0, max_torque=6.0, transform_coordinates=False): 
        self.l, self.m, self.max_torque = l, m, max_torque
        assert not transform_coordinates
        super().__init__() 
        self.friction = friction
        self._u_prev = None                        # For rendering
        self.cp_render = {}
        assert friction == 0.0

    def sym_f(self, x, u, t=None): 
        l, m = self.l, self.m
        g = 9.82
        theta_dot = x[1]  # Parameterization: x = [theta, theta']
        theta_dot_dot =  g/l * sym.sin(x[0]) + 1/(m*l**2) * u[0]
        return [theta_dot, theta_dot_dot]

    def get_cost(self) -> SymbolicQRCost:
        return SymbolicQRCost(R=np.ones((1, 1)), Q=np.eye(2))  

    def tF_bound(self) -> Box: 
        return Box(0.5, 4, shape=(1,), dtype=float)

    def t0_bound(self) -> Box:
        return Box(0, 0, shape=(1,), dtype=float)

    def x_bound(self) -> Box:
        return Box(np.asarray( [-2 * np.pi, -np.inf]), np.asarray( [2 * np.pi, np.inf]), dtype=float)

    def u_bound(self) -> Box:
        return Box(np.asarray([-self.max_torque]), np.asarray([self.max_torque]), dtype=float)

    def x0_bound(self) -> Box:
        return Box(np.asarray( [np.pi, 0] ), np.asarray( [np.pi, 0]), dtype=float)

    def xF_bound(self) -> Box:
        return Box(np.asarray([0, 0]), np.asarray([0, 0]), dtype=float)         

    def close(self):
        for r in self.cp_render.values():
            r.close()

    def render(self, x, render_mode="human"):
        if render_mode not in self.cp_render: # is None or self.cp_render[1] != render_mode:
            self.cp_render[render_mode] = gym.make("Pendulum-v1", render_mode=render_mode)  # environment only used for rendering. Change to v1 in gym 0.26.
            self.cp_render[render_mode].max_time_limit = 10000
            self.cp_render[render_mode].reset()
        self.cp_render[render_mode].unwrapped.state = np.asarray(x)  # environment is wrapped
        self.cp_render[render_mode].unwrapped.last_u = self._u_prev[0] if self._u_prev is not None else None
        return self.cp_render[render_mode].render()


[docs]
class SinCosPendulumModel(PendulumModel):

[docs]
    def phi_x(self, x):
        theta, theta_dot = x[0], x[1]
        return [sym.sin(theta), sym.cos(theta), theta_dot]



[docs]
    def phi_x_inv(self, x):
        sin_theta, cos_theta, theta_dot = x[0], x[1], x[2]
        theta = sym.atan2(sin_theta, cos_theta)  # Obtain angle theta from sin(theta),cos(theta)
        return [theta, theta_dot]



[docs]
    def phi_u(self, u):
        return [sym.atanh(u[0] / self.max_torque)]



[docs]
    def phi_u_inv(self, u):
        return [sym.tanh(u[0]) * self.max_torque]



[docs]
    def u_bound(self) -> Box:
        return Box(np.asarray([-np.inf]), np.asarray([np.inf]), dtype=float)




class DiscreteSinCosPendulumModel(DiscreteControlModel): 
    state_labels =  [r'$\sin(\theta)$', r'$\cos(\theta)$', r'$\dot{\theta}$'] # Check if this escape character works.
    action_labels = ['Torque $u$']

    def __init__(self, dt=0.02, cost=None, **kwargs): 
        model = SinCosPendulumModel(**kwargs) 
        self.max_torque = model.max_torque
        # self.transform_actions = transform_actions  
        super().__init__(model=model, dt=dt, cost=cost) 
        self.x_upright = np.asarray(self.phi_x(model.x_upright))
        self.l = model.l # Pendulum length
        if cost is None:  
            cost = _pendulum_cost(self)
        self.cost = cost  

def _pendulum_cost(model : DiscreteSinCosPendulumModel): # Helper method to set a discrete cost.
    from irlc.ex04.discrete_control_cost import DiscreteQRCost
    Q = np.eye(model.state_size)
    Q[0, 1] = Q[1, 0] = model.l
    Q[0, 0] = Q[1, 1] = model.l ** 2
    Q[2, 2] = 0.0
    R = np.array([[0.1]]) * 10
    c0 = DiscreteQRCost(Q=np.zeros((model.state_size,model.state_size)), R=R)
    c0 = c0 + c0.goal_seeking_cost(Q=Q, x_target=model.x_upright)
    c0 = c0 + c0.goal_seeking_terminal_cost(xN_target=model.x_upright) * 1000
    return c0 * 2

class ThetaPendulumEnvironment(ControlEnvironment):
    def __init__(self, Tmax=5, render_mode=None):
        dt = 0.02
        discrete_model = DiscreteControlModel(PendulumModel(), dt=dt)
        super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode)

class GymSinCosPendulumEnvironment(ControlEnvironment): 
    def __init__(self, *args, Tmax=5, supersample_trajectory=False, render_mode=None, **kwargs): 
        discrete_model = DiscreteSinCosPendulumModel(*args, **kwargs) 
        self.action_space = Box(low=-np.inf, high=np.inf, shape=(discrete_model.action_size,), dtype=float)
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(discrete_model.state_size,), dtype=float)
        super().__init__(discrete_model, Tmax=Tmax, supersample_trajectory=supersample_trajectory, render_mode=render_mode) 

if __name__ == "__main__":
    model = SinCosPendulumModel(l=1, m=1)
    print(str(model))
    print(f"Pendulum with l={model.l}, m={model.m}") 
    x = [1,2]
    u = [0] # Input state/action.
    # x_dot = ...
    # TODO: 1 lines missing.
    raise NotImplementedError("Compute dx/dt = f(x, u, t=0) here using the model-class defined above")
    # x_dot_numpy = ...
    # TODO: 1 lines missing.
    raise NotImplementedError("Compute dx/dt = f(x, u, t=0) here using numpy-expressions you write manually.")

    print(f"Using model-class: dx/dt = f(x, u, t) = {x_dot}")
    print(f"Using numpy: dx/dt = f(x, u, t) = {x_dot_numpy}")