Source code for irlc.ex06.dlqr

# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""

References:
  [Her24] Tue Herlau. Sequential decision making. (Freely available online), 2024.
"""
import numpy as np
import matplotlib.pyplot as plt
from irlc import bmatrix
from irlc import savepdf



[docs] def LQR(A : list, # Dynamic B : list, # Dynamics d : list =None, # Dynamics (optional) Q : list=None, R: list=None, H : list=None, q : list=None, r : list=None, qc : list=None, QN : np.ndarray =None, # Terminal cost term qN : np.ndarray=None, # Terminal cost term qcN : np.ndarray =None, # Terminal cost term. mu : float =0 # regularization parameter which will only be relevant next week. ): r""" Implement the LQR as defined in (Her24, Algorithm 22). I recommend viewing this documentation online (documentation for week 6). When you solve this exercise, look at the algorithm in the book. Since the LQR problem is on the form: .. math:: x_{k+1} = A_k x_k + B_k u_k + d_k For :math:`k=0,\dots,N-1` this means there are :math:`N` matrices :math:`A_k`. This is implemented by assuming that :python:`A` (i.e., the input argument) is a :python:`list` of length :math:`N` so that :python:`A[k]` corresponds to :math:`A_k`. Similar conventions are used for the cost term (please see the lecture notes or the online documentation for their meaning). Recall it has the form: .. math:: c(x_k, u_k) = \frac{1}{2} \mathbf x_k^\top Q_k \mathbf x_k + \frac{1}{2} \mathbf q_k^\top \mathbf x_k + q_k + \cdots When the function is called, the vector :math:`\textbf{q}_k` corresponds to :python:`q` and the constant :math:`q_k` correspond to :python:`qc` (q-constant). .. note:: Only the terms :python:`A` and :python:`B` are required. The rest of the terms will default to 0-matrices. The LQR algorithm will ultimately compute a control law of the form: .. math:: \mathbf u_k = L_k \mathbf x_k + \mathbf l_k And a cost-to-go function as: .. math:: J_k(x_k) = \frac{1}{2} \mathbf x_k^\top V_k \mathbf x_k + v_k^\top \mathbf x_k + v_k Again there are :math:`N-1` terms. The function then return :python:`return (L, l), (V, v, vc)` so that :python:`L[k]` corresponds to :math:`L_k`. :param A: A list of :python:`np.ndarray` containing all terms :math:`A_k` :param B: A list of :python:`np.ndarray` containing all terms :math:`B_k` :param d: A list of :python:`np.ndarray` containing all terms :math:`\mathbf d_k` (optional) :param Q: A list of :python:`np.ndarray` containing all terms :math:`Q_k` (optional) :param R: A list of :python:`np.ndarray` containing all terms :math:`R_k` (optional) :param H: A list of :python:`np.ndarray` containing all terms :math:`H_k` (optional) :param q: A list of :python:`np.ndarray` containing all terms :math:`\mathbf q_k` (optional) :param r: A list of :python:`np.ndarray` containing all terms :math:`\mathbf r_k` (optional) :param qc: A list of :python:`float` containing all terms :math:`q_k` (i.e., constant terms) (optional) :param QN: A :python:`np.ndarray` containing the terminal cost term :math:`Q_N` (optional) :param qN: A :python:`np.ndarray` containing the terminal cost term :math:`\mathbf q_N` (optional) :param qcN: A :python:`np.ndarray` containing the terminal cost term :math:`q_N` :param mu: A regularization term which is useful for iterative-LQR (next week). Default to 0. :return: A tuple of the form :python:`(L, l), (V, v, vc)` corresponding to the control and cost-matrices. """ N = len(A) n,m = B[0].shape # Initialize empty lists for control matrices and cost terms L, l = [None]*N, [None]*N V, v, vc = [None]*(N+1), [None]*(N+1), [None]*(N+1) # Initialize constant cost-function terms to zero if not specified. # They will be initialized to zero, meaning they have no effect on the update rules. QN = np.zeros((n,n)) if QN is None else QN qN = np.zeros((n,)) if qN is None else qN qcN = 0 if qcN is None else qcN H, q, qc, r = init_mat(H,m,n,N=N), init_mat(q,n,N=N), init_mat(qc,1,N=N), init_mat(r,m,N=N) d = init_mat(d,n, N=N) """ In the next line, you should initialize the last cost-term. This is similar to how we in DP had the initialization step > J_N(x_N) = g_N(x_N) Except that since x_N is no longer discrete, we store it as matrices/vectors representing a second-order polynomial, i.e. > J_N(X_N) = 1/2 * x_N' V[N] x_N + v[N]' x_N + vc[N] """ # TODO: 1 lines missing. raise NotImplementedError("Initialize V[N], v[N], vc[N] here") In = np.eye(n) for k in range(N-1,-1,-1): # When you update S_uu and S_ux remember to add regularization as the terms ... (V[k+1] + mu * In) ... # Note that that to find x such that # >>> x = A^{-1} y this # in a numerically stable manner this should be done as # >>> x = np.linalg.solve(A, y) # The terms you need to update will be, in turn: # Suu = ... # Sux = ... # Su = ... # L[k] = ... # l[k] = ... # V[k] = ... # V[k] = ... # v[k] = ... # vc[k] = ... ## TODO: Half of each line of code in the following 4 lines have been replaced by garbage. Make it work and remove the error. #---------------------------------------------------------------------------------------------------------------------------- # Suu = R[k] + B[k].T @ (???????????????????????? # Sux = H[k] + B[k].T @ (???????????????????????? # Su = r[k] + B[k].T @ v[k + 1????????????????????????????? # L[k] = -np.linal????????????????? raise NotImplementedError("Insert your solution and remove this error.") l[k] = -np.linalg.solve(Suu, Su) # You get this for free. Notice how we use np.lingalg.solve(A,x) to compute A^{-1} x V[k] = Q[k] + A[k].T @ V[k+1] @ A[k] - L[k].T @ Suu @ L[k] V[k] = 0.5 * (V[k] + V[k].T) # I recommend putting this here to keep V positive semidefinite # You get these for free: Compare to the code in the algorithm. v[k] = q[k] + A[k].T @ (v[k+1] + V[k+1] @ d[k]) + Sux.T @ l[k] vc[k] = vc[k+1] + qc[k] + d[k].T @ v[k+1] + 1/2*( d[k].T @ V[k+1] @ d[k] ) + 1/2*l[k].T @ Su return (L,l), (V,v,vc)
def init_mat(X, a, b=None, N=None): """ Helper function. Check if X is None, and if so return a list [A, A,....] which is N long and where each A is a (a x b) zero-matrix, else returns X repeated N times: [X, X, ...] """ M0 = np.zeros((a,) if b is None else (a, b)) if X is not None: return [m if m is not None else M0 for m in X] else: return [M0] * N def lqr_rollout(x0,A,B,d,L,l): """ Compute a rollout (states and actions) given solution from LQR controller function. x0 is a vector (starting state), and A, B, d and L, l are lists of system/control matrices. """ x, states,actions = x0, [x0], [] n,m = B[0].shape N = len(L) d = init_mat(d,n,1,N) # Initialize as a list of zero matrices [ np.zeros((n,1)), np.zeros((n,1)), ...] l = init_mat(l,m,1,N) # Initialize as a list of zero matrices [ np.zeros((m,1)), np.zeros((m,1)), ...] for k in range(N): u = L[k] @ x + l[k] x = A[k] @ x + B[k] @ u + d[k] actions.append(u) states.append(x) return states, actions if __name__ == "__main__": """ Solve this problem (see also lecture notes for the same example) http://cse.lab.imtlucca.it/~bemporad/teaching/ac/pdf/AC2-04-LQR-Kalman.pdf """ N = 20 A = np.ones((2,2)) A[1,0] = 0 B = np.asarray([[0], [1]]) Q = np.zeros((2,2)) R = np.ones((1,1)) print("System matrices A, B, Q, R") print(bmatrix(A)) print(bmatrix(B)) print(bmatrix(Q)) print(bmatrix(R)) for rho in [0.1, 10, 100]: Q[0,0] = 1/rho (L,l), (V,v,vc) = LQR(A=[A]*N, B=[B]*N, d=None, Q=[Q]*N, R=[R]*N, QN=Q) x0 = np.asarray( [[1],[0]]) trajectory, actions = lqr_rollout(x0,A=[A]*N, B=[B]*N, d=None,L=L,l=l) xs = np.concatenate(trajectory, axis=1)[0,:] plt.plot(xs, 'o-', label=f'rho={rho}') k = 10 print(f"Control matrix in u_k = L_k x_k + l_k at k={k}:", L[k]) for k in [N-1,N-2,0]: print(f"L[{k}] is:", L[k].round(4)) plt.title("Double integrator") plt.xlabel('Steps $k$') plt.ylabel('$x_1 = $ x[0]') plt.legend() plt.grid() savepdf("dlqr_double_integrator") plt.show()