Source code for irlc.ex04.discrete_control_model

# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""

References:
  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
"""
from irlc.ex03.control_model import ControlModel
import sympy as sym
import numpy as np
import sys
# Add a few numpy functions to sympy.
sympy_modules_ = ['numpy', {'atan': np.arctan, 'atan2': np.arctan2, 'atanh': np.arctanh}, 'sympy']
sympy_modules = ['sympy']


[docs]
class DiscreteControlModel: 
    """
    A discretized model. To create a model of this type, first specify a symbolic model, then pass it along to the constructor.
    Since the symbolic model will specify the dynamics as a symbolic function, the discretized model can automatically discretize it
    and create functions for computing derivatives.

    The class will also discretize the cost. Note that it is possible to specify coordinate transformations.
    """
    state_labels = None
    action_labels = None

    "This field represents the :class:`~irlc.ex04.continuous_time_model.ContinuousSymbolicModel` the discrete model is derived from."
    continuous_model = None


[docs]
    def __init__(self, model: ControlModel, dt: float, cost=None, discretization_method=None): 
        r"""
        Create the discretized model.

        :param model: The continuous-time model to discretize.
        :param dt: Discretization timestep :math:`\Delta`
        :param cost: If this parameter is not specified, the cost will be derived (discretized) automatically from ``model``
        :param discretization_method: Can be either ``'Euler'`` (default) or ``'ei'`` (exponential integration). The later will assume that the model is a linear.
        """
        self.dt = dt  
        self.continuous_model = model   
        if discretization_method is None:
            from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
            if isinstance(model, LinearQuadraticModel):
                discretization_method = 'Ei'
            else:
                discretization_method = 'Euler'
        self.discretization_method = discretization_method.lower()

        """ Initialize symbolic variables representing inputs and actions. """

        uc = sym.symbols(f"uc:{model.action_size}") 
        xc = sym.symbols(f"xc:{model.state_size}")

        xd, ud = model.phi_x(xc), model.phi_u(uc) # Compute discrete symbolic variables x_k, u_k.

        x = sym.symbols(f"x:{len(xd)}") 
        u = sym.symbols(f"u:{len(ud)}")

        """ x_next is a symbolic variable representing x_{k+1} = f_k(x_k, u_k) """
        x_next = self._f_discrete_sym(x, u, dt=dt) 
        """ compute the symbolic derivate of x_next wrt. z = (x,u): d x_{k+1}/dz """
        dy_dz = sym.Matrix([[sym.diff(f, zi) for zi in list(x) + list(u)] for f in x_next])
        """ Define (numpy) functions giving next state and the derivatives """
        self._f_z_np = sym.lambdify((tuple(x), tuple(u)), dy_dz, modules=sympy_modules_) 
        # Create a numpy function corresponding to the discretized model x_{k+1} = f_discrete(x_k, u_k) 
        self._f_np = sym.lambdify((tuple(x), tuple(u)), x_next, modules=sympy_modules_)  
        self._n = len(x)
        self._d = len(u)

        # Make action/state transformation

        self.phi_x_inv = sym.lambdify( (x,), model.phi_x_inv(x), modules=sympy_modules_)
        self.phi_u_inv = sym.lambdify( (u,), model.phi_u_inv(u), modules=sympy_modules_)

        self.phi_x = sym.lambdify((xc,), model.phi_x(xc), modules=sympy_modules_)
        self.phi_u = sym.lambdify((uc,), model.phi_u(uc), modules=sympy_modules_)

        # set labels (only used for plotting)
        if self.state_labels is None:
            self.state_labels = self.continuous_model.state_labels

        if self.action_labels is None:
            self.action_labels = self.continuous_model.action_labels

        if cost is None: # Discretize the cost function from the continuous-time model.
            self.cost = model.get_cost().discretize(dt=dt) 
        else:
            self.cost = cost


    @property
    def state_size(self):
        """
        The dimension of the state vector :math:`x`, i.e. :math:`n`
        :return: Dimension of the state vector :math:`n`
        """
        return self._n

    @property
    def action_size(self):
        """
        The dimension of the action vector :math:`u`, i.e. :math:`d`
        :return: Dimension of the action vector :math:`d`
        """
        return self._d

    def _f_discrete_sym(self, xs, us, dt):
        """
        This is a helper function. It computes the discretized dynamics as a symbolic object:

        .. math::
            x_{k+1}  = f_k(x_k, u_k, t_k)

        The parameters corresponds to states and actions and are lists of the form ``[x0, x1, ..]`` and ``[u0, u1, ..]``
        where each element is a symbolic expression. The function returns a list of the form ``[f0, f1, ..]`` where
        each element is a symbolic expression corresponding to a coordinate of :math:`f_k`.

        :param xs: List of symbolic expressions corresponding to the coordinates of :math:`x_k`
        :param us: List of symbolic expressions corresponding to the coordinates of :math:`x_u`
        :param dt: A symbolic expressions corresponding to :math:`t_k`
        :return: A list of symbolic expressions corresponding to the coordinates of :math:`f_k`
        """
        # xc, uc = self.sym_discrete_xu2continious_xu(xs, us)
        xc, uc = self.continuous_model.phi_x_inv(xs), self.continuous_model.phi_u_inv(us)
        if self.discretization_method == 'euler':
            xdot = self.continuous_model.sym_f(x=xc, u=uc)
            xnext = [x_ + xdot_ * dt for x_, xdot_ in zip(xc, xdot)]
        elif self.discretization_method == 'ei':  # Assume the continuous model is linear; a bit hacky, but use exact Exponential integration in that case
            A = self.continuous_model.A
            B = self.continuous_model.B
            d = self.continuous_model.d
            """ These are the matrices of the continuous-time problem.
            > dx/dt = Ax + Bu + d
            and should be discretized using the exact integration technique (see (Her25, Subsection 13.1.3) and (Her25, Subsection 13.1.6)); 
            the precise formula you should implement is given in (Her25, eq. (13.19))
            
            Remember the output matrix should be symbolic (see Euler integration for examples) but you can assume there are no variable transformations for simplicity.            
            """
            from scipy.linalg import expm
            """
            expm computes the matrix exponential: 
            > expm(A) = exp(A) 
            """
            Ad = expm(A * dt)
            n = Ad.shape[0]
            d =  d.reshape( (len(B),1) ) if d is not None else np.zeros( (n, 1) )
            Bud = B @ sym.Matrix(uc) + (sym.zeros(len(B),1) if d is None else d)
            x_next = sym.Matrix(Ad) @ sym.Matrix(xc) + dt * phi1(A * dt) @ Bud
            xnext = list(x_next)
        else:
            raise Exception("Unknown discreetization method", self.discretization_method)
        xnext = self.continuous_model.phi_x(xnext)
        return xnext


[docs]
    def f(self, x, u, k=0): 
        r"""
        This function implements the dynamics :math:`f_k(x_k, u_k)` of the model. They can be evaluated as:

        .. runblock:: pycon

            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
            >>> model = DiscreteSinCosPendulumModel()
            >>> x = [0, 1, 0.4]
            >>> u = [1]
            >>> print(model.f(x,u) )           # Computes x_{k+1} = f_k(x_k, u_k)

        The model will by default be Euler discretized:

        .. math::

            x_{k+1} = f_k(x_k, u_k) = x_k + \Delta f(x_k, u_k)

        except :python:`LinearQuadraticModel` which will be discretized using Exponential Integration by default.


        :param x: The state as a numpy array
        :param u: The action as a numpy array
        :param k: The time step as an integer (currently this has no effect)
        :return: The next state :math:`x_{x+1}` as a numpy array.
        """
        fx = np.asarray( self._f_np(x, u) ) 
        return fx 



[docs]
    def f_jacobian(self, x, u, k=0):
        r"""Compute the Jacobians of the discretized dynamics.

        The function will compute the two Jacobian derives of the discrete dynamics :math:`f_k` with respect to :math:`x` and :math:`u`:

        .. math::
            J_x f_k(x,u), \quad J_u f_k(x, u)

        .. runblock:: pycon

            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
            >>> model = DiscreteSinCosPendulumModel()
            >>> x = [0, 1, 0.4]
            >>> u = [0]
            >>> f, Jx, Ju = model.f(x,u)
            >>> Jx, Ju = model.f_jacobian(x,u)
            >>> print("Jacobian Jx is\\n", Jx)
            >>> print("Jacobian Ju is\\n", Ju)


        :param x: The state as a numpy array
        :param u: The action as a numpy array
        :param k: The time step as an integer (currently this has no effect)
        :return: The two Jacobians computed wrt. :math:`x` and :math:`u`.
        """
        J = self._f_z_np(x, u)
        return J[:, :self.state_size], J[:, self.state_size:]



    def render(self, x=None, render_mode="human"):
        return self.continuous_model.render(x=self.phi_x_inv(x), render_mode=render_mode)

    def close(self):
        self.continuous_model.close()

    def __str__(self):
        """
        Return a string representation of the model. This is a potentially helpful way to summarize the content of the
        model. You can use it as:

        .. runblock:: pycon

            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
            >>> model = DiscreteSinCosPendulumModel()
            >>> print(model)

        :return: A string containing the details of the model.
        """
        split = "-"*20
        s = [f"{self.__class__}"] + ['='*50]
        s += [f"Dynamics (after discretization with Delta = {self.dt}):", split]
        # t = sym.symbols("t")
        x = sym.symbols(f"x0:{self.state_size}")
        u = sym.symbols(f"u0:{self.action_size}")

        f = self._f_discrete_sym(x, u, self.dt)
        from irlc.ex03.control_model import typeset_eq
        s += [typeset_eq(x, u, f)]

        s += ["Continuous-time dynamics:", split]
        xc = sym.symbols(f"x:{self.continuous_model.state_size}")
        uc = sym.symbols(f"u:{self.continuous_model.action_size}")

        s += [f"f_k({x}, {u}) = {str(self.continuous_model.sym_f(xc, uc))}", '']
        s += ["Variable transformations:", split]
        xd, ud = self.continuous_model.phi_x(xc), self.continuous_model.phi_u(uc)
        s += [f' * phi_x( x(t) ) -> x_k = {xd}']
        s += [f' * phi_u( u(t) ) -> u_k = {ud}', '']
        s += ["Cost:", split, str(self.cost)]
        return "\n".join(s)



def phi1(A):
    """ This is a helper functions which computes

    .. math::
        A^{-1} (e^A - I)
        
    and importantly deals with potential numerical instability in the expression.
    """
    from scipy.linalg import expm
    from math import factorial
    if np.linalg.cond(A) < 1 / sys.float_info.epsilon:
        return np.linalg.solve(A, expm(A) - np.eye( len(A) ) )
    else:
        C = np.zeros_like(A)
        for k in range(1, 20):
            dC = np.linalg.matrix_power(A, k - 1) / factorial(k)
            C += dC
        assert sum( np.abs(dC.flat)) < 1e-10
        return C