From 02c2bc15dcdeae8dfb4cf4da6fadbb3d7517b3b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Adjemian=20=28Ry=C3=BBk=29?=
 <stepan@adjemian.eu>
Date: Fri, 3 Nov 2023 23:00:23 +0100
Subject: [PATCH] Add notes.

---
 notes.tex | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 340 insertions(+)
 create mode 100644 notes.tex

diff --git a/notes.tex b/notes.tex
new file mode 100644
index 0000000..49d3dd2
--- /dev/null
+++ b/notes.tex
@@ -0,0 +1,340 @@
+\synctex=1
+
+\documentclass[oneside]{amsart}
+\usepackage{graphicx,float,caption,longtable,booktabs,dcolumn,geometry,changepage,nopageno,hyperref,fancyvrb}
+
+\usepackage{pgfplots}
+
+\newcolumntype{d}[1]{D{.}{.}{#1}}
+
+\pgfplotsset{compat=newest}
+\usetikzlibrary{plotmarks}
+\usetikzlibrary{arrows.meta}
+\usepgfplotslibrary{patchplots}
+% \usepackage{grffile}
+% \usepackage{amsmath}
+\pgfplotsset{every axis/.append style={
+    scaled y ticks = false,
+    scaled x ticks = true,
+    xticklabel style={/pgf/number format/fixed},
+    yticklabel style={/pgf/number format/fixed, /pgf/number format/1000 sep={}},
+  }}
+
+\hypersetup{
+    colorlinks=true,
+    linkcolor=blue,
+    filecolor=magenta,
+    urlcolor=cyan,
+    pdftitle={Overleaf Example},
+    pdfpagemode=FullScreen,
+    }
+
+\urlstyle{same}
+\VerbatimFootnotes
+
+
+\setlength{\parindent}{0pt}
+
+\begin{document}
+
+
+The evaluation of the likelihood of a linear DSGE model rely on the Kalman
+filter. If the steady state of the DSGE model is provided in closed form, the
+Kalman filter is the part of the estimation where most of the time is spent. In
+this note we evaluate the gain we can obtain by considering an alternative
+initialisation of the Kalman filter.\newline
+
+The reduced form DSGE model is given by:
+\[
+y_t = \bar y + A \left(y_{t-1}-\bar y\right) + B \varepsilon_t
+\]
+where $y_t$ is a $n\times 1$ vector of the endogenous variables defined by the
+DSGE model, $\bar y$ the steady state of these endogenous variables,
+$\varepsilon_t$ a $q\times 1$ vector of Gaussian innovations with zero mean and
+covariance matrix $\Sigma_{\varepsilon}$. Matrices $A$ and $B$, respectively of
+dimensions $n\times n$ and $n\times q$, as the steady state $\bar y$ are
+nonlinear functions of the deep parameters we aim to estimate (along with the
+covariance matrix $\Sigma_{\varepsilon}$). We only observe $p\geq q$ endogenous
+variables, the vector of observed endogenous variables is:
+\[
+y^{\star}_t = Z y_t
+\]
+where $Z$ is a $p\times n$ selection matrix\footnote{Here we implicitly exclude,
+  without loss of generality, measurement errors.}. The likelihood, the density
+of the sample, can be expressed as a product of the condition (Gaussian)
+densities of the prediction errors $v_t$ which are determine recursively with
+the Kalman filter:
+\begin{equation}
+  \label{eq:kalman:1}
+  v_t = y_t^{\star} - \bar y^{\star}- Z \hat y_t
+\end{equation}
+
+\begin{equation}
+  \label{eq:kalman:2}
+  F_t = Z P_t Z'
+\end{equation}
+
+\begin{equation}
+  \label{eq:kalman:3}
+  K_t = A P_t A' Z' F_t^{-1}
+\end{equation}
+
+\begin{equation}
+  \label{eq:kalman:4}
+  \hat y_{t+1} = A \hat y_t + K_t v_t
+\end{equation}
+
+\begin{equation}
+  \label{eq:kalman:5}
+  P_{t+1} = A P_t \left( A - K_t Z\right)' + B \Sigma_{\varepsilon}B'
+\end{equation}
+
+where $F_t$ is the conditional covariance matrix of the prediction errors, what
+we learn from observation $t$ is given by the Kalman gain matrix $K_t$,
+$\hat y_t$ the vector of endogenous variables in deviations to the steady state,
+and $P_t$ the conditional covariance matrix of $\hat y_t$. Substituting (\ref{eq:kalman:2})
+and (\ref{eq:kalman:3}) into (\ref{eq:kalman:5}) we obtain the following recurrence
+for the conditional covariance matrix of the endogenous variables:
+\begin{equation}
+  \label{eq:ricatti}
+  P_{t+1} = A P_t A' - A P_t Z' \left(Z P_t Z \right)^{-1}Z A P_t A' + B \Sigma_{\varepsilon}B'
+\end{equation}
+This is the Ricatti equation, where most of the time is spent when evaluating
+the Kalman filter and where round off errors can accumulate dramatically. When
+iterating over these equations the Ricatti equation will eventually (provided
+the sample is large enough) converge to a fixed point $\bar P$ satisfying:
+\begin{equation}
+  \label{eq:ricatti:fp}
+A \bar P A' - \bar P - A \bar P Z' \left(Z \bar P Z \right)^{-1}Z A \bar P A' + B \Sigma_{\varepsilon}B' = 0
+\end{equation}
+and the Kalman filter equations fall down into the following system:
+\[
+  \begin{cases}
+    v_t &= y_t^{\star} - \bar y^{\star}- Z \hat y_t\\
+    \hat y_{t+1} &= A \hat y_t + \bar K v_t
+  \end{cases}
+\]
+referred as the steady state Kalman filter, with $\bar F = Z \bar P Z'$ and
+$\bar K = A \bar P A' Z' \bar F^{-1}$, on which iterations take obviously much
+less time than with system (\ref{eq:kalman:1})--(\ref{eq:kalman:5}). The number
+of iterations required to (approximately) converge to the steady state does not
+depend on the data but only on the persistence in the model (through matrix
+$A$).\newline
+
+Due to its recursive nature, the Kalman filter requires an initial condition
+$\hat y_0$ and $P_0$. Assuming that the model is stationary, a usual choice for
+the initial condition is the (stochastic) fixed point of the reduced form model:
+$\hat y_0 = 0$ and $P_0=P^{\star}$ solving:
+\[
+P^{\star} - A P^{\star} A' - B \Sigma_{\varepsilon} B' = 0
+\]
+which is a Sylvester equation that can be easily solved.\newline
+
+In this note we consider instead the fixed point of the Ricatti equation
+$\bar P$ as an initial condition for the covariance matrix of the endogenous
+variables. The computational advantage is obvious: we directly jump to the
+steady state Kalman filter and never go through
+(\ref{eq:kalman:1})--(\ref{eq:kalman:5}). Clearly this will reduce the time
+required to compute the likelihood, but this will also change the evaluation of
+the likelihood. Using a medium scaled model we quantify the saved the computing
+time and evaluate the consequences for the estimation of the parameters. Dynare
+provides a MEX file, linked to the subroutine \verb+sb02od+ from the
+\href{http://www.slicot.org/}{Slicot library} (version 5.0), to solve
+(\ref{eq:ricatti:fp}) for $\bar{P}$. The use of $\bar P$ as an initial condition
+for the Kalman filter is triggered by option \verb+lik_init=4+ in the
+\verb+estimation+ command.\newline
+
+We consider the estimation of the Quest-III model (113 endogenous variables, 19
+innovations, 17 observed variables) as an example. All the codes to reproduce
+the results are available available in a
+\href{https://git.dynare.org/stepan-a/quest-iii-estimation}{Git repository}. The
+model is estimated under both settings (initialization with $\bar P$, RFP, or
+with $P^{\star}$)\footnote{To reproduce the results run \verb+Q3.mod+, provided
+  in the root of the Git repository, with macro options
+  \verb+-DUSE_RICATTI_FIXED_POINT=true+ and
+  \verb+-DUSE_RICATTI_FIXED_POINT=false+.} with \verb+mode_compute=5+. We find
+that the estimation of the model with the fixed point of the reduced form model
+($P^{\star}$) is 33\% slower than the estimation of the same model with the
+fixed point of the Ricatti equation ($\bar P$). Because the objective functions
+are different, the optimiser probably do not behave identically in both cases.
+If we just evaluate the likelihood on the same point (the prior mean), instead
+of running \verb+mode_compute=5+, we find that the evaluation with $P^{\star}$
+is just 21\% slower than the evaluation with $\bar P$, suggesting that
+initialising with the fixed point of the Ricatti equation makes the objective
+easier to optimise. This is expected since it is well known that
+iterations on (\Ref{eq:ricatti}) are not well behaved. The values of the logged
+posterior kernel at the estimated posterior mode are close (7235.92 with
+$\bar P$ and 7237.68 with $P^{\star}$). But much larger differences can be
+observed elsewhere. For instance, at the prior mean the values are -275.58 with
+$\bar P$ and 5.13 with $P^{\star}$. Do these differences in the likelihood
+functions matter? Table \ref{tab:1} shows the estimated values for the
+parameters. Both likelihood functions deliver similar estimates in terms of
+point estimate or variance\footnote{Note however that the estimates for the
+  posterior variance should be taken with care since they rely on an asymptotic
+  Gaussian approximation. For some parameters, the estimated posterior variance
+  is larger than the prior variance... That should not be the case.}. Figures
+(\ref{fig:1})--(\ref{fig:7}) display cross sections of the likelihood (black
+plain curves for the initialisation with $P^{\star}$, red dashed curves for the
+initialisation with $\bar P$). The conclusion seems to be that even if the
+likelihood functions are different, inference on the parameters is only
+marginally affected. At least for a first attempt, we can safely consider the
+alternative initialisation of the Kalman filter. We should try also to compare
+posterior densities, running samplers under both settings, this would bring a
+more definitive answer (at least in the case of Quest-III). Ideally we should
+also build a Monte-Carlo exercise, on a smaller model. All this is left for
+future research. Note that the gain in terms of computing time is model
+\textit{and parameter} dependent. If the standard Kalman filter converges
+quickly to the steady state the gain is small (this depends on the persistence
+in the model).
+
+\newpage
+
+\begin{longtable}[l]{ l  l  d{4}  d{4}  d{4}  d{4}  d{4}  d{4} }
+\toprule
+\multicolumn{1}{c}{} & \multicolumn{3}{c}{\textbf{Prior}} & \multicolumn{2}{c}{\textbf{Posterior with $P^{\star}$}} & \multicolumn{2}{c}{\textbf{Posterior with $\bar P$}}\\
+\cmidrule(rl){2-4}
+\cmidrule(rl){5-6}
+\cmidrule(rl){7-8}
+\textbf{Parameters} & Shape & \multicolumn{1}{c}{Mode} & \multicolumn{1}{c}{SD} & \multicolumn{1}{c}{Mode} & \multicolumn{1}{c}{SD} & \multicolumn{1}{c}{Mode} & \multicolumn{1}{c}{SD}  \\
+\midrule
+\endhead
+
+\\
+\midrule
+\multicolumn{8}{c}{\tiny{\textbf{Continued on next page}}}\\
+\endfoot
+
+\bottomrule
+\\
+\caption{\textbf{Estimation results of Quest-III.} Without and with the fixed point of the Ricatti equation as an initial condition of the Kalman filter. Standard deviation estimates rely on the inverse of the Hessian matrix at the estimated posterior mode.}
+\endlastfoot
+
+E\_EPS\_C & Gamma & 0.0320 & 0.0300 & 0.0528 & 0.0109 & 0.0521 & 0.0111\\
+E\_EPS\_ETA & Gamma & 0.0640 & 0.0600 & 0.1163 & 0.0340 & 0.1225 & 0.0347\\
+E\_EPS\_ETAM & Gamma & 0.0088 & 0.0150 & 0.0183 & 0.0022 & 0.0183 & 0.0023\\
+E\_EPS\_ETAX & Gamma & 0.0640 & 0.0600 & 0.0274 & 0.0152 & 0.0279 & 0.0159\\
+E\_EPS\_EX & Gamma & 0.0032 & 0.0030 & 0.0044 & 0.0004 & 0.0043 & 0.0004\\
+E\_EPS\_G & Gamma & 0.0320 & 0.0300 & 0.0047 & 0.0003 & 0.0047 & 0.0003\\
+E\_EPS\_IG & Gamma & 0.0320 & 0.0300 & 0.0054 & 0.0004 & 0.0054 & 0.0004\\
+E\_EPS\_L & Gamma & 0.0320 & 0.0300 & 0.0220 & 0.0052 & 0.0213 & 0.0049\\
+E\_EPS\_LOL & Gamma & 0.0032 & 0.0030 & 0.0040 & 0.0009 & 0.0045 & 0.0008\\
+E\_EPS\_M & Gamma & 0.0016 & 0.0015 & 0.0012 & 0.0001 & 0.0012 & 0.0001\\
+E\_EPS\_RPREME & Gamma & 0.0032 & 0.0030 & 0.0015 & 0.0002 & 0.0014 & 0.0002\\
+E\_EPS\_RPREMK & Gamma & 0.0032 & 0.0030 & 0.0050 & 0.0016 & 0.0054 & 0.0018\\
+E\_EPS\_TR & Gamma & 0.0320 & 0.0300 & 0.0022 & 0.0001 & 0.0022 & 0.0001\\
+E\_EPS\_W & Gamma & 0.0320 & 0.0300 & 0.0396 & 0.0129 & 0.0410 & 0.0136\\
+E\_EPS\_Y & Gamma & 0.0320 & 0.0300 & 0.0118 & 0.0012 & 0.0120 & 0.0012\\
+A2E & Beta & 0.0500 & 0.0240 & 0.0378 & 0.0132 & 0.0370 & 0.0130\\
+G1E & Beta & 0.0000 & 0.6000 & -0.0456 & 0.0977 & -0.0455 & 0.0963\\
+GAMIE & Gamma & 16.6667 & 20.0000 & 62.7825 & 18.9969 & 66.3819 & 19.6627\\
+GAMI2E & Gamma & 8.3333 & 10.0000 & 0.6044 & 0.3537 & 0.6161 & 0.3627\\
+GAMLE & Gamma & 16.6667 & 20.0000 & 54.0067 & 10.8176 & 55.4743 & 11.2277\\
+GAMPE & Gamma & 16.6667 & 20.0000 & 45.8370 & 13.5341 & 48.2540 & 13.8116\\
+GAMPME & Gamma & 16.6667 & 20.0000 & 1.3257 & 0.4455 & 1.3501 & 0.4538\\
+GAMPXE & Gamma & 16.6667 & 20.0000 & 8.9856 & 7.5122 & 9.3195 & 7.9101\\
+GAMWE & Gamma & 16.6667 & 20.0000 & 0.5913 & 0.3666 & 0.5815 & 0.3625\\
+GSLAG & Beta & 0.0000 & 0.4000 & -0.4336 & 0.1045 & -0.4321 & 0.1046\\
+GVECM & Beta & -0.5000 & 0.2000 & -0.1552 & 0.0432 & -0.1591 & 0.0439\\
+HABE & Beta & 0.7222 & 0.1000 & 0.5381 & 0.0400 & 0.5369 & 0.0398\\
+HABLE & Beta & 0.7222 & 0.1000 & 0.8731 & 0.0560 & 0.8637 & 0.0591\\
+IG1E & Beta & 0.0000 & 0.6000 & 0.1802 & 0.0943 & 0.1824 & 0.0924\\
+IGSLAG & Beta & 0.5000 & 0.2000 & 0.4384 & 0.0819 & 0.4401 & 0.0819\\
+IGVECM & Beta & -0.5000 & 0.2000 & -0.1330 & 0.0586 & -0.1354 & 0.0582\\
+ILAGE & Beta & 0.8856 & 0.0750 & 0.8887 & 0.0156 & 0.8856 & 0.0160\\
+KAPPAE & Gamma & 1.0500 & 0.5000 & 1.6617 & 0.3660 & 1.6443 & 0.3596\\
+RHOCE & Beta & 0.8856 & 0.0750 & 0.9305 & 0.0242 & 0.9382 & 0.0221\\
+RHOETA & Beta & 0.5000 & 0.2000 & 0.0771 & 0.0596 & 0.0733 & 0.0563\\
+RHOETAM & Beta & 0.8856 & 0.0750 & 0.9599 & 0.0138 & 0.9602 & 0.0138\\
+RHOETAX & Beta & 0.8856 & 0.0750 & 0.8841 & 0.0592 & 0.8824 & 0.0610\\
+RHOGE & Beta & 0.5000 & 0.2000 & 0.2930 & 0.1116 & 0.2978 & 0.1130\\
+RHOIG & Beta & 0.8856 & 0.0750 & 0.8789 & 0.0660 & 0.8806 & 0.0645\\
+RHOLE & Beta & 0.8856 & 0.0750 & 0.9844 & 0.0081 & 0.9865 & 0.0076\\
+RHOL0 & Beta & 0.9578 & 0.0200 & 0.9360 & 0.0233 & 0.9320 & 0.0214\\
+RHOPCPM & Beta & 0.5000 & 0.2000 & 0.6987 & 0.1411 & 0.7006 & 0.1403\\
+RHOPWPX & Beta & 0.5000 & 0.2000 & 0.1889 & 0.0667 & 0.1895 & 0.0671\\
+RHORPE & Beta & 0.8856 & 0.0750 & 0.9908 & 0.0065 & 0.9943 & 0.0037\\
+RHORPK & Beta & 0.8856 & 0.0750 & 0.9254 & 0.0270 & 0.9175 & 0.0271\\
+RHOUCAP0 & Beta & 0.9578 & 0.0200 & 0.9602 & 0.0160 & 0.9560 & 0.0160\\
+RPREME & Beta & 0.0200 & 0.0080 & 0.0205 & 0.0071 & 0.0204 & 0.0068\\
+RPREMK & Beta & 0.0200 & 0.0080 & 0.0243 & 0.0030 & 0.0256 & 0.0025\\
+SE & Beta & 0.8000 & 0.0800 & 0.8540 & 0.0208 & 0.8561 & 0.0206\\
+SFPE & Beta & 0.5000 & 0.2000 & 0.8831 & 0.0672 & 0.8878 & 0.0656\\
+SFPME & Beta & 0.5000 & 0.2000 & 0.7211 & 0.1288 & 0.7239 & 0.1277\\
+SFPXE & Beta & 0.5000 & 0.2000 & 0.9293 & 0.0508 & 0.9299 & 0.0503\\
+SFWE & Beta & 0.5000 & 0.2000 & 0.7997 & 0.1439 & 0.8045 & 0.1413\\
+SIGC & Gamma & 1.5000 & 1.0000 & 4.3516 & 1.2195 & 4.3758 & 1.2276\\
+SIGEXE & Gamma & 1.0500 & 0.5000 & 2.4811 & 0.3204 & 2.4645 & 0.3212\\
+SIGIME & Gamma & 1.0500 & 0.5000 & 1.1520 & 0.1930 & 1.1564 & 0.1931\\
+SLC & Beta & 0.5000 & 0.1000 & 0.3293 & 0.0668 & 0.3225 & 0.0668\\
+TINFE & Beta & 2.0000 & 0.4000 & 1.9073 & 0.1996 & 1.7677 & 0.1378\\
+TR1E & Beta & 0.0000 & 0.6000 & 0.9235 & 0.0913 & 0.9246 & 0.0908\\
+RHOTR & Beta & 0.8856 & 0.0750 & 0.8588 & 0.0488 & 0.8576 & 0.0486\\
+TYE1 & Beta & 0.1500 & 0.2000 & 0.3427 & 0.0920 & 0.3198 & 0.0893\\
+TYE2 & Beta & 0.1500 & 0.2000 & 0.0720 & 0.0258 & 0.0680 & 0.0253\\
+WRLAG & Beta & 0.5000 & 0.2000 & 0.2274 & 0.1364 & 0.2258 & 0.1352\\
+\label{tab:1}
+\end{longtable}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-1.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:1}
+\end{figure}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-2.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:2}
+\end{figure}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-3.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:3}
+\end{figure}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-4.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:4}
+\end{figure}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-5.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:5}
+\end{figure}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-6.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:6}
+\end{figure}
+
+\begin{figure}[H]
+  \centering
+  \scalebox{.75}{
+  \input{cross-posterior-kernel-7.tex}}
+  \caption{Cross sections of the posterior kernel.}
+  \label{fig:7}
+\end{figure}
+
+\end{document}
+
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End: