# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text."""References: [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025."""fromirlc.ex03.control_modelimportControlModelimportsympyassymimportnumpyasnpimportsys# Add a few numpy functions to sympy.sympy_modules_=['numpy',{'atan':np.arctan,'atan2':np.arctan2,'atanh':np.arctanh},'sympy']sympy_modules=['sympy']
[docs]classDiscreteControlModel:""" A discretized model. To create a model of this type, first specify a symbolic model, then pass it along to the constructor. Since the symbolic model will specify the dynamics as a symbolic function, the discretized model can automatically discretize it and create functions for computing derivatives. The class will also discretize the cost. Note that it is possible to specify coordinate transformations. """state_labels=Noneaction_labels=None"This field represents the :class:`~irlc.ex04.continuous_time_model.ContinuousSymbolicModel` the discrete model is derived from."continuous_model=None
[docs]def__init__(self,model:ControlModel,dt:float,cost=None,discretization_method=None):r""" Create the discretized model. :param model: The continuous-time model to discretize. :param dt: Discretization timestep :math:`\Delta` :param cost: If this parameter is not specified, the cost will be derived (discretized) automatically from ``model`` :param discretization_method: Can be either ``'Euler'`` (default) or ``'ei'`` (exponential integration). The later will assume that the model is a linear. """self.dt=dtself.continuous_model=modelifdiscretization_methodisNone:fromirlc.ex04.model_linear_quadraticimportLinearQuadraticModelifisinstance(model,LinearQuadraticModel):discretization_method='Ei'else:discretization_method='Euler'self.discretization_method=discretization_method.lower()""" Initialize symbolic variables representing inputs and actions. """uc=sym.symbols(f"uc:{model.action_size}")xc=sym.symbols(f"xc:{model.state_size}")xd,ud=model.phi_x(xc),model.phi_u(uc)# Compute discrete symbolic variables x_k, u_k.x=sym.symbols(f"x:{len(xd)}")u=sym.symbols(f"u:{len(ud)}")""" x_next is a symbolic variable representing x_{k+1} = f_k(x_k, u_k) """x_next=self._f_discrete_sym(x,u,dt=dt)""" compute the symbolic derivate of x_next wrt. z = (x,u): d x_{k+1}/dz """dy_dz=sym.Matrix([[sym.diff(f,zi)forziinlist(x)+list(u)]forfinx_next])""" Define (numpy) functions giving next state and the derivatives """self._f_z_np=sym.lambdify((tuple(x),tuple(u)),dy_dz,modules=sympy_modules_)# Create a numpy function corresponding to the discretized model x_{k+1} = f_discrete(x_k, u_k) self._f_np=sym.lambdify((tuple(x),tuple(u)),x_next,modules=sympy_modules_)self._n=len(x)self._d=len(u)# Make action/state transformationself.phi_x_inv=sym.lambdify((x,),model.phi_x_inv(x),modules=sympy_modules_)self.phi_u_inv=sym.lambdify((u,),model.phi_u_inv(u),modules=sympy_modules_)self.phi_x=sym.lambdify((xc,),model.phi_x(xc),modules=sympy_modules_)self.phi_u=sym.lambdify((uc,),model.phi_u(uc),modules=sympy_modules_)# set labels (only used for plotting)ifself.state_labelsisNone:self.state_labels=self.continuous_model.state_labelsifself.action_labelsisNone:self.action_labels=self.continuous_model.action_labelsifcostisNone:# Discretize the cost function from the continuous-time model.self.cost=model.get_cost().discretize(dt=dt)else:self.cost=cost
@propertydefstate_size(self):""" The dimension of the state vector :math:`x`, i.e. :math:`n` :return: Dimension of the state vector :math:`n` """returnself._n@propertydefaction_size(self):""" The dimension of the action vector :math:`u`, i.e. :math:`d` :return: Dimension of the action vector :math:`d` """returnself._ddef_f_discrete_sym(self,xs,us,dt):""" This is a helper function. It computes the discretized dynamics as a symbolic object: .. math:: x_{k+1} = f_k(x_k, u_k, t_k) The parameters corresponds to states and actions and are lists of the form ``[x0, x1, ..]`` and ``[u0, u1, ..]`` where each element is a symbolic expression. The function returns a list of the form ``[f0, f1, ..]`` where each element is a symbolic expression corresponding to a coordinate of :math:`f_k`. :param xs: List of symbolic expressions corresponding to the coordinates of :math:`x_k` :param us: List of symbolic expressions corresponding to the coordinates of :math:`x_u` :param dt: A symbolic expressions corresponding to :math:`t_k` :return: A list of symbolic expressions corresponding to the coordinates of :math:`f_k` """# xc, uc = self.sym_discrete_xu2continious_xu(xs, us)xc,uc=self.continuous_model.phi_x_inv(xs),self.continuous_model.phi_u_inv(us)ifself.discretization_method=='euler':xdot=self.continuous_model.sym_f(x=xc,u=uc)xnext=[x_+xdot_*dtforx_,xdot_inzip(xc,xdot)]elifself.discretization_method=='ei':# Assume the continuous model is linear; a bit hacky, but use exact Exponential integration in that caseA=self.continuous_model.AB=self.continuous_model.Bd=self.continuous_model.d""" These are the matrices of the continuous-time problem. > dx/dt = Ax + Bu + d and should be discretized using the exact integration technique (see (Her25, Subsection 13.1.3) and (Her25, Subsection 13.1.6)); the precise formula you should implement is given in (Her25, eq. (13.19)) Remember the output matrix should be symbolic (see Euler integration for examples) but you can assume there are no variable transformations for simplicity. """fromscipy.linalgimportexpm""" expm computes the matrix exponential: > expm(A) = exp(A) """Ad=expm(A*dt)n=Ad.shape[0]d=d.reshape((len(B),1))ifdisnotNoneelsenp.zeros((n,1))Bud=B@sym.Matrix(uc)+(sym.zeros(len(B),1)ifdisNoneelsed)x_next=sym.Matrix(Ad)@sym.Matrix(xc)+dt*phi1(A*dt)@Budxnext=list(x_next)else:raiseException("Unknown discreetization method",self.discretization_method)xnext=self.continuous_model.phi_x(xnext)returnxnext
[docs]deff(self,x,u,k=0):r""" This function implements the dynamics :math:`f_k(x_k, u_k)` of the model. They can be evaluated as: .. runblock:: pycon >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel >>> model = DiscreteSinCosPendulumModel() >>> x = [0, 1, 0.4] >>> u = [1] >>> print(model.f(x,u) ) # Computes x_{k+1} = f_k(x_k, u_k) The model will by default be Euler discretized: .. math:: x_{k+1} = f_k(x_k, u_k) = x_k + \Delta f(x_k, u_k) except :python:`LinearQuadraticModel` which will be discretized using Exponential Integration by default. :param x: The state as a numpy array :param u: The action as a numpy array :param k: The time step as an integer (currently this has no effect) :return: The next state :math:`x_{x+1}` as a numpy array. """fx=np.asarray(self._f_np(x,u))returnfx
[docs]deff_jacobian(self,x,u,k=0):r"""Compute the Jacobians of the discretized dynamics. The function will compute the two Jacobian derives of the discrete dynamics :math:`f_k` with respect to :math:`x` and :math:`u`: .. math:: J_x f_k(x,u), \quad J_u f_k(x, u) .. runblock:: pycon >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel >>> model = DiscreteSinCosPendulumModel() >>> x = [0, 1, 0.4] >>> u = [0] >>> f, Jx, Ju = model.f(x,u) >>> Jx, Ju = model.f_jacobian(x,u) >>> print("Jacobian Jx is\\n", Jx) >>> print("Jacobian Ju is\\n", Ju) :param x: The state as a numpy array :param u: The action as a numpy array :param k: The time step as an integer (currently this has no effect) :return: The two Jacobians computed wrt. :math:`x` and :math:`u`. """J=self._f_z_np(x,u)returnJ[:,:self.state_size],J[:,self.state_size:]
defrender(self,x=None,render_mode="human"):returnself.continuous_model.render(x=self.phi_x_inv(x),render_mode=render_mode)defclose(self):self.continuous_model.close()def__str__(self):""" Return a string representation of the model. This is a potentially helpful way to summarize the content of the model. You can use it as: .. runblock:: pycon >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel >>> model = DiscreteSinCosPendulumModel() >>> print(model) :return: A string containing the details of the model. """split="-"*20s=[f"{self.__class__}"]+['='*50]s+=[f"Dynamics (after discretization with Delta = {self.dt}):",split]# t = sym.symbols("t")x=sym.symbols(f"x0:{self.state_size}")u=sym.symbols(f"u0:{self.action_size}")f=self._f_discrete_sym(x,u,self.dt)fromirlc.ex03.control_modelimporttypeset_eqs+=[typeset_eq(x,u,f)]s+=["Continuous-time dynamics:",split]xc=sym.symbols(f"x:{self.continuous_model.state_size}")uc=sym.symbols(f"u:{self.continuous_model.action_size}")s+=[f"f_k({x}, {u}) = {str(self.continuous_model.sym_f(xc,uc))}",'']s+=["Variable transformations:",split]xd,ud=self.continuous_model.phi_x(xc),self.continuous_model.phi_u(uc)s+=[f' * phi_x( x(t) ) -> x_k = {xd}']s+=[f' * phi_u( u(t) ) -> u_k = {ud}','']s+=["Cost:",split,str(self.cost)]return"\n".join(s)
defphi1(A):""" This is a helper functions which computes .. math:: A^{-1} (e^A - I) and importantly deals with potential numerical instability in the expression. """fromscipy.linalgimportexpmfrommathimportfactorialifnp.linalg.cond(A)<1/sys.float_info.epsilon:returnnp.linalg.solve(A,expm(A)-np.eye(len(A)))else:C=np.zeros_like(A)forkinrange(1,20):dC=np.linalg.matrix_power(A,k-1)/factorial(k)C+=dCassertsum(np.abs(dC.flat))<1e-10returnC