-
Notifications
You must be signed in to change notification settings - Fork 0
/
time-domain-sls.tex
327 lines (296 loc) · 15.3 KB
/
time-domain-sls.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
\documentclass[11pt]{article}
\usepackage{graphicx, amsmath,amsthm,amssymb,url}
\usepackage{empheq}
\usepackage[colorlinks=true,linkcolor=blue,urlcolor=black,citecolor=blue,breaklinks]{hyperref}
\usepackage[labelfont=bf]{subcaption}
\usepackage[labelfont=bf]{caption}
\usepackage{algorithm,algcompatible,mathtools}
\usepackage[multidot]{grffile}
\usepackage[normalem]{ulem} % just for strikeout
\usepackage{enumerate}
\usepackage{thmtools}
\usepackage{thm-restate}
\usepackage{fullpage,etoolbox}
\usepackage{color}
\usepackage[draft]{commands}
\newcommand*\widefbox[1]{\fbox{\hspace{1em}#1\hspace{1em}}}
\numberwithin{equation}{section}
\def \basefigwidth{0.4}
% Fix space breaking
\relpenalty=9999
\binoppenalty=9999
\title{Working Note: Time-Domain System Level Synthesis}
\author{Nikolai Matni}
\date{\today}
\begin{document}
\maketitle
\section{A time-domain derivation}
Consider the system
\begin{equation}
x_{k+1} = Ax_k + Bu_k + w_k.
\label{eq:dynamics}
\end{equation}
In what follows, we will use $z(t)$ to denote the signal $z$ over time, and $z_k$ to denote its instantaneous value at $t=k$, i.e., $z(t) = (z_0,z_1,\dots)$. For a matrix $X$, we use $X^j$ to denote its $j$th column; we also sometimes overload notation and use a superscript as a label for a vector, i.e., we will sometimes write $x^j(t)$ to denote a specific instantiation of a generic signal $x$.
Our goal is to characterize the closed loop maps from $w(t) \to x(t)$ and $w(t) \to u(t)$ under the assumption that $u(t) = (K\star x)(t)$ for some LTI controller $K(t)$. Here $(g\star h)(t)$ denotes the convolution of signals $g(t)$ and $h(t)$. Specifically, we seek filters $R(t)$ and $M(t)$ such that
\begin{equation}
\begin{array}{rcl}
x(t) &=& (R\star w)(t) \\
u(t) &=& (M\star w)(t),
\end{array}
\label{eq:responses}
\end{equation}
under the assumption that $u(t) = (K\star x)(t)$.
First note that for any disturbance signal $w(t)$ we can write $w(t) = \sum_k \sum_j w_{jk}e_j\delta_{k-t}$, and hence by linearity of our dynamics \eqref{eq:dynamics}, it suffices to characterize the impulse responses of the system due to $w^j(t):= e_j \delta_k$. To that end, we write $x^j(t)$ and $u^j(t)$ to denote the state and control responses due to the $j$th impulse disturbance $w^j(t)$; the dynamics \eqref{eq:dynamics} then simplify to
\begin{equation}
R^j_{k+1} = AR^j_k + BM^j_k, \ R^j_1 = e_j,
\end{equation}
where we have used the fact that the resulting state and control responses satisfy, by definition,
\[
\begin{array}{rclclcl}
x^j_k &=& \sum_{t=1}^k R_tw^j_{k-t} &=& \sum_{t=1}^k R_te_j \delta_{k-t} &=& R^j_k \\
u^j_k &=& \sum_{t=1}^k M_tw^j_{k-t} &=& \sum_{t=1}^k M_te_j \delta_{k-t} &=& M^j_k.
\end{array}
\]
Therefore, concatenating the respective responses, we have that the responses $R(t)$ and $M(t)$ must satsify
\begin{equation}
R_{k+1} = AR_k + BM_k, \ R_1 = I.
\label{eq:constraints}
\end{equation}
Our next claim is that since $R(t)$ and $M(t)$ satisfy $R_0 = 0$ and $M_0=0$, the controller can be implemented directly as $u(t) = (M\star w)(t)$ by noting that $u_k$ only depends on $w_1,\dots,w_{k-1}$, and that at time $k$, one can compute $w_{k-1} = x_k - (Ax_{k-1} + Bu_{k-1})$. It therefore follows from the constraints \eqref{eq:constraints} that the desired state response is also achieved. To make this explicit, notice that for $w(t) = w^j(t) = e_j \delta_k$ we have from the dynamics \eqref{eq:dynamics} that
\[
x^j_1 = e_j = R^j_1.
\]
Similarly, we have that
\[
x^j_2 = Ax^j_1 + Bu^j_1 = AR^j_1 + BM^j_1 = R^j_2
\]
where the final equality follows from the constraints \eqref{eq:constraints}. This argument is easily extended to any $k\geq 2$ by induction.
Note that this argument is valid over finite or infinite horizons -- note that in the infinite horizon case, additional stability constraints must be imposed on $R(t)$ and $M(t)$ to ensure that the resulting closed loop system is itself stable.
\section{Finite-horizon LQR}
\subsection{Zero IC, Gaussian Noise}
Here we consider the finite-horizon LQR problem with zero initial condition, driven by Gaussian noise:
\begin{equation}
\begin{array}{rl}
\min_{x(t),u(t)} & \sum_{k=1}^T \E\left[x_k^\tp Q x_k + u_k^\tp Su_k\right] + \E\left[x_{T+1}^\tp Q_F x_{T+1}\right] \\
\st & x_{k+1} = Ax_k + Bu_k + w_k, \ x_0 = 0,
\end{array}
\label{eq:lqr_noise}
\end{equation}
for $w_k {\iid}\mathcal{N}(0,I)$ (note that this is wlog assuming a non-degenerate distribution for the process noise). We now show how this problem can be recast in terms of the system responses $R(t)$ and $M(t)$. First note that given equations \eqref{eq:responses}, it follows that
\begin{equation}
\begin{array}{rcl}
x_k &=& \sum_{t=1}^k R_t w_{k-t} \\
u_k &=& \sum_{t=1}^k M_t w_{k-t}.
\end{array}
\label{eq:dist}
\end{equation}
Exploiting the i.i.d. nature of the disturbance, we can then write
\begin{equation}
\begin{array}{rcl}
\E\left[x_k^\tp Q x_k\right] &=& \sum_{t=1}^k \Tr R_t^\tp Q R_t \\
\E\left[u_k^\tp S u_k\right] &=& \sum_{t=1}^k \Tr M_t^\tp S M_t \\
\E\left[x_{T+1}^\tp Q_F x_{T+1}\right] &=& \sum_{t=1}^{T+1} \Tr R_t^\tp Q_F R_t.
\end{array}
\label{eq:new_expectations}
\end{equation}
Plugging equations \eqref{eq:new_expectations} into the objective function of optimization problem \eqref{eq:lqr_noise}, collecting like terms and optimizing over system responses as opposed to state and input trajectories, we may rewrite the standard LQR problem as
\begin{equation}
\begin{array}{rl}
\min_{R(t),M(t)} & \sum_{\tau=1}^T (T+1-\tau)\left[\Tr R_\tau^\tp Q R_\tau + \Tr M_\tau^\tp S M_\tau\right] + \sum_{s=1}^{T+1} \Tr R_s^\tp Q_F R_s \\
\st & R_{k+1} = AR_k + BM_k, \ R_1 = I.
\end{array}
\end{equation}
\subsection{Nonzero IC, no noise}
Here we consider the finite-horizon LQR problem with nonzero initial condition and no noise:
\begin{equation}
\begin{array}{rl}
\min_{x(t),u(t)} & \sum_{k=1}^T \left[x_k^\tp Q x_k + u_k^\tp Su_k\right]+ x_{T+1}^\tp Q_F x_{T+1}\\
\st & x_{k+1} = Ax_k + Bu_k, \ x_0 = \xi.
\end{array}
\label{eq:lqr_IC}
\end{equation}
In this case, we can define the disturbance signal driving the system to be
\begin{equation}
w(t) := \xi \delta_k = (\xi,0,0,\dots).
\end{equation}
It then follows that
\begin{equation}
\begin{array}{rclclcl}
x_k &=& \sum_{t=1}^k R_t w_{k-t} &=& \sum_{t=1}^k R_t \xi \delta_{k-t} &=& R_k \xi \\
u_k &=& \sum_{t=1}^k M_t w_{k-t} &=& \sum_{t=1}^k M_t \xi \delta_{k-t} &=& M_k \xi,
\end{array}
\end{equation}
meaning that the the LQR problem \eqref{eq:lqr_IC} can be rewritten as
\begin{equation}
\begin{array}{rl}
\min_{R(t),M(t)} & \sum_{k=1}^T\left[\Tr R_k^\tp Q R_k\Xi + \Tr M_k^\tp S M_k\Xi\right] + \Tr R_{T+1}^\tp Q_F R_{T+1}\Xi \\
\st & R_{k+1} = AR_k + BM_k, \ R_1 = I,
\end{array}
\end{equation}
where $\Xi := \xi \xi^\tp$.
\subsection{Putting them together}
We now consider the standard LQR problem with nonzero initial condition and driving process noise
\begin{equation}
\begin{array}{rl}
\min_{x(t),u(t)} & \sum_{k=1}^T \E\left[x_k^\tp Qx_k + u_k^\tp Su_k\right]+ \E\left[x_{T+1}^\tp Q_F x_{T+1}\right]\\
\st & x_{k+1} = Ax_k + Bu_k + w_k, \ x_0 = \xi,
\end{array}
\label{eq:lqr_standard}
\end{equation}
for $w_k {\iid}\mathcal{N}(0,I)$. Exploiting linearity and the fact that $\E\left[ w_k\xi^\tp\right] = 0$ for all $k$ we can simply combine the results of the previous subsections, allowing us to rewrite the standard LQR problem \eqref{eq:lqr_standard} as
\begin{equation}
\begin{array}{rl}
\min_{R(t),M(t)} & J_\mathrm{stoch}(R(t),M(t)) + J_\mathrm{det}(R(t),M(t),\Xi) \\
\st & R_{k+1} = AR_k + BM_k, \ R_1 = I,
\end{array}
\label{eq:sls_LQR}
\end{equation}
where
\begin{equation}
\begin{array}{rcl}
J_\mathrm{stoch}(R(t),M(t)) &:=& \sum_{\tau=1}^T (T+1-\tau)\left[\Tr R_\tau^\tp Q R_\tau + \Tr M_\tau^\tp S M_\tau\right] + \sum_{s=1}^{T+1} \Tr R_s^\tp Q_F R_s \\
J_\mathrm{det}(R(t),M(t),\Xi) &:=& \sum_{k=1}^T\left[\Tr R_k^\tp Q R_k\Xi + \Tr M_k^\tp S M_k\Xi\right] + \Tr R_{T+1}^\tp Q_F R_{T+1}\Xi.
\end{array}
\end{equation}
\section{A redo of the above using block-Toeplitz operators}
In the following we work with the signals and block-Toeplitz lower-triangular operators, and assume $x_0 = 0$, but allow $w_0 \neq 0$\footnote{This is wlog, as $x_1 = w_0$ can be taken as the nonzero initial condition.}:
\begin{equation}
\tf x = \begin{bmatrix} x_1 \\ x_2 \\ \vdots \\ x_{T+1} \end{bmatrix} \ \tf u = \begin{bmatrix} u_1 \\ u_2 \\ \vdots \\ u_{T+1} \end{bmatrix} \ \tf w = \begin{bmatrix} w_0 \\ w_1 \\ \vdots \\ w_T\end{bmatrix} \ \tf R = \begin{bmatrix} R_1 & & & \\ R_2 & R_1 & & \\ \vdots & \ddots & \ddots & \\ R_{T+1} & \cdots & R_2 & R_1 \end{bmatrix} \ \tf M = \begin{bmatrix} M_1 & & & \\ M_2 & M_1 & & \\ \vdots & \ddots & \ddots & \\ M_{T+1} & \cdots & M_2 & M_1 \end{bmatrix} \label{eq:definitions}.
\end{equation}
Further let $Z$ be the block-downshift operator, i.e., a matrix with identity matrices along its first block sub-diagonal and zeros elsewhere. We can then equivalently write equation \eqref{eq:responses} as
\begin{equation}
\begin{array}{rcl}
\tf x &=& \tf R \tf w \\
\tf u &=& \tf M \tf w.
\label{eq:toep_resp}
\end{array}
\end{equation}
Following the same reasoning as above, it is straightforward to show that the responses $\{\tf R, \tf M\}$ must satisfy the equation
\begin{equation}
\begin{bmatrix} I - Z\A & -Z\B \end{bmatrix}\begin{bmatrix} \tf R \\ \tf M \end{bmatrix} = I,
\label{eq:subspace}
\end{equation}
where
\begin{equation}
\A := \mathrm{blkdiag}\left(A_1,A_2,\dots,A_T\right), \ \B := \mathrm{blkdiag}\left(B_1,B_2,\dots,B_T\right),
\end{equation}
\begin{theorem}
For the dynamics \eqref{eq:dynamics} with block-Toeplitz lower-triangular state feedback law $\tf K$ defining the control action as $\tf u = \tf K \tf x$, the following are true
\begin{enumerate}
\item the affine subspace defined by \eqref{eq:subspace} parameterizes all possible responses \eqref{eq:toep_resp} of the system.
\item for any block-Toeplitz matrices $\{\tf R, \tf M\}$ satisfying \eqref{eq:subspace}, the controller $\tf K = \tf M \tf R^{-1}$ achieves the desired response.
\end{enumerate}
\label{thm:param}
\end{theorem}
\begin{proof}
First note that we can equivalently write the dynamics \eqref{eq:dynamics} as
\begin{equation}
\tf x = Z\A \tf x+ Z\B \tf u + \tf w
\end{equation}
Proof of 1.: Let $\tf K$ be any block-Toeplitz lower-triangular operator, and $\tf u = \tf K \tf x$. Then
\begin{equation}
\tf x = Z(\A+\B\tf K)\tf x + \tf w,
\end{equation}
which implies that
\begin{equation}
\begin{array}{rcl}
\tf x &=& (I-Z(\A+\B\tf K))^{-1}\tf w \\
\tf u &=& \tf K(I-Z(\A+\B\tf K))^{-1}\tf w.
\end{array}
\end{equation}
It is then easily seen that
\begin{equation}
\begin{bmatrix} I - Z\A \ -Z\B\end{bmatrix}\begin{bmatrix}(I-Z(\A+\B\tf K))^{-1} \\ \tf K (I-Z(\A+\B\tf K))^{-1} \end{bmatrix} = (I - Z\A -Z\B\tf K)(I-Z(\A+\B\tf K))^{-1} = I.
\end{equation}
Proof of 2.: Let $\tf K = \tf M \tf R^{-1}$. Then
\begin{equation}
\tf x = (I-Z(\A+\B\tf M \tf R^{-1}))^{-1}\tf w.
\end{equation}
But we then have that
\begin{equation}
(I-Z(\A+\B\tf M \tf R^{-1}))^{-1} = ((I-Z\A)\tf R - Z\B \tf M)\tf R^{-1})^{-1} = \tf R ((I-Z\A)\tf R - Z\B \tf M)^{-1} = \tf R
\end{equation}
where the last equality follows form the fact that $\{\tf R, \tf M\}$ satisfy \eqref{eq:subspace}. Similarly we have that
\begin{equation}
\tf u = \tf M \tf R^{-1}\tf x = \tf M \tf R^{-1} \tf R \tf w = \tf M \tf w,
\end{equation}
where the second equality follows from the fact that $\tf x = \tf R \tf w$.
\end{proof}
Next we consider a robust variant of the above theorem. Before stating the result, we let
\begin{equation}
\tf \Delta = \begin{bmatrix} \Delta_1 & & & \\ \Delta_2 & \Delta_1 & & \\ \vdots & \ddots & \ddots & \\ \Delta_{T+1} & \cdots & \Delta_2 & \Delta_1 \end{bmatrix}
\label{eq:delta}
\end{equation}
be an arbitrary block-Toeplitz lower-triangular matrix.
\begin{theorem}
Let $\tf \Delta$ be defined as in equation \eqref{eq:delta}, and suppose that $\{\tf R, \tf M\}$ satisfy
\begin{equation}
\begin{bmatrix}I - Z\A & -Z\B \end{bmatrix}\begin{bmatrix} \tf R \\ \tf M \end{bmatrix} = I + \tf \Delta.
\label{eq:robust}
\end{equation}
If $(I+\Delta_1)^{-1}$ exists, then the controller $\tf K = \tf M \tf R^{-1}$ achieves the system response
\begin{equation}
\begin{bmatrix}
\tf R \\ \tf M \end{bmatrix} (I + \tf \Delta)^{-1}
\label{eq:robust_response}
\end{equation}
\end{theorem}
\begin{proof}
If $(I+\Delta_1)^{-1}$ exists, then so does $(I + \tf \Delta)^{-1}$ does too, and therefore the constraint \eqref{eq:robust} is equivalent to
\begin{equation}
\begin{bmatrix}I - Z\A & -Z\B \end{bmatrix}\begin{bmatrix} \tf R \\ \tf M \end{bmatrix}(I + \tf \Delta)^{-1} = I.
\end{equation}
Then noting that $\tf K = \tf M \tf R^{-1} = \tf M (I + \tf \Delta)^{-1} (\tf R (I + \tf \Delta)^{-1})^{-1} $, we then have, by Theorem \ref{thm:param}, that the controller $\tf K = \tf M \tf R^{-1} $ achieves the system responses \eqref{eq:robust_response}.
\end{proof}
\section{LQR optimal control problems}
Going forward, we assume that $\tf w \iid \mathcal{N}(0,I)$. For simplicity of notation, we assume that $u_{T+1}$ is included in the cost functional, i.e., that the cost we seek to minimize is
\begin{equation}
\begin{array}{rl}
\min_{x(t),u(t)} & \sum_{k=1}^{T+1} \E\left[x_k^\tp Q_k x_k + u_k^\tp S_ku_k\right] \\
\st & x_{k+1} = Ax_k + Bu_k + w_k, \ x_0 = 0.
\end{array}
\label{eq:lqr_toep}
\end{equation}
This problem can be rewritten as
\begin{equation}
\begin{array}{rl}
\min_{\tf R,\tf M} & \bignorm{\begin{bmatrix} \Qq^{\frac{1}{2}} & 0 \\ 0 & \Ss^{\frac{1}{2}}\end{bmatrix}\begin{bmatrix}\tf R \\ \tf M\end{bmatrix}}_F^2 \\
\st & \begin{bmatrix} I - Z\A & -Z\B \end{bmatrix}\begin{bmatrix} \tf R \\ \tf M \end{bmatrix} = I,
\end{array}
\label{eq:lqr_compact}
\end{equation}
where
\begin{equation}
\Qq := \mathrm{blkdiag}(Q_1,Q_2, \dots, Q_{T+1}), \ \Ss := \mathrm{blkdiag}(S_1,S_2, \dots, S_{T+1}).
\end{equation}
\section{A statement of the Learning-LQR result}
The above conditions/results should look extremely similar to what we have seen in the infinite horizon setting. Letting $\Ahh = \A + \DA$ and $\Bhh = \B + \DB$, with the assumption that $\|\DA\| \leq \epsilon_A$ and $\|\DB\|\leq \epsilon_B$, we can step through the same proof procedure as that in the infinite horizon setting, replacing $\hinf$ norms with operator norms, and $\mathcal{H}_2$ norms with Frobenius norms, to obtain an analogous result.
We define the finite-time resolvent $\Res{\tf M}$ of an operator $\tf M$ to be
\begin{equation}
\Res{\tf M} := (I - Z\tf M)^{-1},
\end{equation}
and let
\begin{equation}
\tf \Delta := - Z(\DA + \DB \tf K_\star)\Res{\A + \B\tf K_\star},
\end{equation}
for $\tf K_\star$ the optimal LQR controller. Further let
\begin{equation}
\zeta := (\epsilon_A + \epsilon_B \| \tf K_\star\|)\Res{\A + \B\tf K_\star}.
\end{equation}
Then, if $\zeta \leq \frac{1}{5}$, the controller $\tf K=\tf M \tf R^{-1}$ computed by solving the optimization problem
\begin{equation}
\begin{array}{rl}
\min_{\gamma \in [0,1)} \frac{1}{1-\gamma} \min_{\tf R, \tf M} & \bignorm{\begin{bmatrix} \Qq^{\frac{1}{2}} & 0 \\ 0 & \Ss^{\frac{1}{2}}\end{bmatrix}\begin{bmatrix}\tf R \\ \tf M\end{bmatrix}}_F \\
\st & \begin{bmatrix} I - Z\Ahh & -Z\Bhh \end{bmatrix}\begin{bmatrix} \tf R \\ \tf M \end{bmatrix} = I, \ \sqrt{2}\bignorm{\begin{matrix} \epsilon_A\tf R \\ \epsilon_B\tf M\end{matrix}} \leq \gamma
\end{array}
\end{equation}
achieves the relative performance bound
\begin{equation}
\frac{J(\A,\B,\tf K) - J_\star}{J_\star} \leq 5 \zeta,
\end{equation}
where $J_\star$ is the optimal performance achievable if $\A$ and $\B$ are known.
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: