ispra-2023/slides.tex

% © Stéphane Adjemian, 2014-2017

\documentclass[10pt,slidestop]{beamer}
\synctex=1

\usepackage{etex}
\usepackage{fourier-orns}
\usepackage{ccicons}
\usepackage{amssymb}
\usepackage{amstext}
\usepackage{amsbsy}
\usepackage{amsopn}
\usepackage{amscd}
\usepackage{amsxtra}
\usepackage{amsthm}
\usepackage{float}
\usepackage{color}
\usepackage{mathrsfs}
\usepackage{bm}
\usepackage{lastpage}
\usepackage[nice]{nicefrac}
\usepackage{setspace}
\usepackage{hyperref}
\usepackage{ragged2e}
\usepackage{listings}
\usepackage{fontawesome}

\usepackage[utf8x]{inputenc}

\usepackage{array}
\usepackage[super]{nth}
\usepackage{algorithm}
\usepackage{algpseudocode}
\newcolumntype{x}[1]{>{\centering\arraybackslash\hspace{0pt}}m{#1}}
\setlength{\parindent}{0pt}
\pdfcompresslevel=9


\newcommand{\trace}{\mathrm{tr}}
\newcommand{\vect}{\mathrm{vec}}
\newcommand{\tracarg}[1]{\mathrm{tr}\left\{#1\right\}}
\newcommand{\vectarg}[1]{\mathrm{vec}\left(#1\right)}
\newcommand{\vecth}[1]{\mathrm{vech}\left(#1\right)}
\newcommand{\iid}[2]{\mathrm{iid}\left(#1,#2\right)}
\newcommand{\normal}[2]{\mathcal N\left(#1,#2\right)}
\newcommand{\dynare}{\href{http://www.dynare.org}{\color{blue}Dynare}}
\newcommand{\AllSample}{ \mathcal Y_T }
\newcommand{\sample}{\mathcal Y_T}
\newcommand{\samplet}[1]{\mathcal Y_{#1}}
\newcommand{\slidetitle}[1]{\fancyhead[L]{\textsc{#1}}}

\definecolor{gray}{gray}{0.4}

\setbeamertemplate{footline}{
{\hfill\vspace*{1pt}\href{https://creativecommons.org/publicdomain/zero/1.0/}{\cczero}
\href{https://git.ithaca.fr/stepan/ispra-2023}{\faCodeFork}
}\hspace{1cm}}

\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{blocks}[rounded][shadow=true]
\newenvironment{notes}
{\bgroup \justifying\bgroup\tiny\begin{spacing}{1.0}}
{\end{spacing}\egroup\egroup}

\begin{document}

\title{Estimation of nonlinear models with Dynare}
\author[]{}
\institute{\texttt{stephane.adjemian@univ-lemans.fr}}
\date{September,  2023}

\begin{frame}
  \titlepage{}
\end{frame}

\begin{frame}
  \frametitle{Introduction}

  \bigskip

  \begin{itemize}

  \item Full information estimation of models approximated at higher order...\newline

  \item Or, in principle, full information estimation of nonlinear models.\newline

  \item Cannot use the (linear) Kalman filter anymore.\newline

  \item Dynare provides routines to evaluate the likelihood of models approximated at order $k\geq 1$.

  \end{itemize}

\end{frame}


\begin{frame}
  \frametitle{The reduced form model}

  \bigskip

  \[
    \begin{aligned}
      & s_t = f( s_{t-1}, \varepsilon_t; \bm\theta ) \\
      & y_t = g( s_t; \bm\theta) + e_t \\
    \end{aligned}
  \]

  \bigskip

  \begin{itemize}

  \item $f(.)$ and $g(.)$ are the state and measurement equations.\newline

  \item $\bm\theta\in\Theta\subseteq \mathbb R^m$ a vector of $m$ parameters.\newline

  \item Cannot use the (linear) Kalman filter anymore.\newline

  \item $s_t$ and $y_t$ are the vectors of state variables and observed variables.\newline

  \item Innovations $\varepsilon_t$ and $e_t$ are the structural shocks and measurement errors.\newline

  \item $\#(y_t) = \#(e_t)$

  \end{itemize}

\end{frame}


\begin{frame}
  \frametitle{Reduced form with second order approximation}

  \bigskip

  \begin{itemize}

  \item The ``state'' equations:

  \[
    \begin{split}
      s_t = \bar{s}(\bm\theta) & + g_u(\bm\theta) \hat{s}_{t-1} + g_u(\bm\theta) \varepsilon_t \\
                               & + 0.5 g_{\sigma\sigma}(\bm\theta) \\
                               & + 0.5 g_{yy}(\bm\theta)\left(\hat{s}_{t-1} \otimes \hat{s}_{t-1}\right) \\
                               & + 0.5 g_{uu}(\bm\theta) \left(\varepsilon_t \otimes \varepsilon_t\right) \\
                               & + 0.5 g_{uy}(\bm\theta) \left(\hat{s}_{t-1} \otimes \varepsilon_t\right)
    \end{split}
  \]

  \medskip

  \item The measurement equations:


  \[
    y_t = Z s_t + e_t
  \]

  \bigskip

  where $Z$ is a selection matrix.

  \end{itemize}

\end{frame}


\begin{frame}[c]
  \frametitle{Properties of the state space model}

  \begin{itemize}

  \item $s_t \sim $ first order Markov process:\newline

    \[
      p\left(s_t | s_{0:t-1} \right) = p(s_t\left| s_{t-1} \right)
    \]

    \bigskip\bigskip

  \item Observations are conditionally independent:\newline

    \[
      p\left( y_t | y_{1:t-1}, s_{0:t} \right) = p\left(y_t | s_t \right)
    \]

    \bigskip\bigskip

  \item[$\Rightarrow$] We cannot evaluate the likelihood, product of $p(y_t|y_{1:t-1})$ densities, without tracking the state variables.

  \end{itemize}

\end{frame}


\begin{frame}[c]
  \frametitle{Nonlinear filter (I)}

  \begin{itemize}

  \item Suppose $p(y_{t-1}|s_{t-1})$ is known $\longrightarrow$ How to compute $p(y_{t}|s_{t})$?\newline


    \bigskip


  \item First we can predict the states in $t$ given information in $t-1$:
    \begin{equation}\label{prediction}
      p\left(s_t|y_{1:t-1}\right) = \int p(s_t|s_{t-1})p\left(s_{t-1}|y_{1:t-1}\right)\mathrm d s_{t-1}
    \end{equation}
    where $p(s_t|s_{t-1})$ is defined by the state equations:
    \[
      \begin{split}
        p\left(s_t|s_{t-1}\right) &= \int p\left(s_t|s_{t-1},\varepsilon_t\right)p\left(\varepsilon_t|s_{t-1}\right)\mathrm d\varepsilon_t\\
      &= \int p\left(s_t|s_{t-1},\varepsilon_t\right)p\left(\varepsilon_t\right)\mathrm d\varepsilon_t
      \end{split}
    \]

  \end{itemize}

\end{frame}


\begin{frame}[c]
  \frametitle{Nonlinear filter (II)}

  \begin{itemize}

  \item $p\left(s_t|y_{1:t-1}\right)$ can be interpreted as our prior belief about the state variables.\newline

  \item Use Bayes theorem to update our beliefs:
    \begin{equation}\label{update}
      p\left(s_t|y_{1:t}\right) = \frac{p\left(y_t|s_t\right)p\left(s_t|y_{1:t-1}\right)}{p\left(y_t|y_{1:t-1}\right)}
    \end{equation}
    where:
    \[
      p\left(y_t|y_{1:t-1}\right) = \int p\left(y_t|s_t\right)p\left(s_t|y_{1:t-1}\right) \mathrm d s_t
    \]
    which is the conditional density required to evaluate the likelihood of the model.\newline

  \item  $p\left(y_t|s_t\right)$, likelihood of $s_t$, is defined by the measurement equations.\newline

  \item[$\Rightarrow$] Iterate over (\ref{prediction}) and (\ref{update}).


  \end{itemize}

\end{frame}


\begin{frame}[c]
  \frametitle{Approximated nonlinear filter (I)}
  \framesubtitle{Approximate the distribution of $s_t| y_{1:t}$}

  \begin{itemize}

  \item Suppose the distribution of $s_t| y_{1:t}$ can be accurately approximated by
    a set of nodes and weights
    $\left\{\left(s_t^i,\omega_t^i\right)\right\}_{i=1}^N$, with non negative weights
    summing up to one.\newline

  \item[$\Rightarrow$] When $N\rightarrow\infty$ $\sum_{i=1}^N\omega_i h(s_t^i)$ converges to $\mathbb E_{p(s_t|y_{1:t})}\left[h(s)\right]$.\newline

  \item Can be used to approximate the predictive densities:
    \[
      \widehat{p}\left(s_t|y_{1:t-1}\right) =
      \sum_{i=1}^N p(s_t | s_{t-1}^i)\omega_{t-1}^i
    \]

  \item Particles $\leftarrow$ random deviates from $s_t| y_{1:t}$.\newline

  \item How to sample from $s_t| y_{1:t}$?

  \end{itemize}

\end{frame}


\begin{frame}[c]
  \frametitle{Approximated nonlinear filter (II)}
  \framesubtitle{Importance sampling (a)}

  \begin{itemize}

  \item Suppose we can sample from $q\left(s_t| y_{1:t}\right)$ but not from $q\left(s_t| y_{1:t}\right)$.\newline

  \item Then:
    \[
      \begin{split}
        \mathbb E_{p(s_t|y_{1:t})}\left[h(s)\right] &= \int
                                                      \underbrace{\frac{p\left(s_t| y_{1:t}\right)}{q\left(s_t| y_{1:t}\right)}}_{\widetilde w_t(s_t)}q\left(s_t| y_{1:t}\right) h(s_t)\mathrm d s_t\\
        &= \mathbb E_{q(s_t|y_{1:t})}\left[\widetilde \omega_t(s_t)h(s_t)\right]
      \end{split}
    \]

  \item By the Bayes theorem:
    \[
      p\left(s_t | y_{1:t} \right) = \frac{p\left(y_{1:t}|s_t\right)p\left(s_t\right)}{p\left(y_{1:t}\right)}
    \]

  \item Unormalized weights:
    \[
      \widehat \omega_t(s_t) = \frac{p\left(y_{1:t}|s_t\right)p\left(s_t\right)}{q\left(s_t| y_{1:t}\right)}
      = p\left(y_{1:t}\right)\widetilde \omega_t(s_t)
    \]

  \end{itemize}

\end{frame}


\begin{frame}[c]
  \frametitle{Approximated nonlinear filter (II)}
  \framesubtitle{Importance sampling (b)}

  With particles:
    \[
      \hat \omega_t^i = \frac{p\left(y_{1:t}|s_{t}^i\right)p\left(s_t^i\right)}{q\left(s_t^i| y_{1:t}\right)}
    \]

    \bigskip

    Then

    \[
      \widehat{\mathbb E}_{p(s_t|y_{1:t})}\left[h(s)\right] = \sum_{i=1}^N \widetilde{\omega}_ih(s_t^i)
    \]

    where

    \[
      \widetilde{\omega}_i = \frac{\widehat \omega_i}{\sum_{i=1}^N\widehat \omega_i}
    \]
  \end{frame}


  \begin{frame}[c]
  \frametitle{Approximated nonlinear filter (III)}
  \framesubtitle{Sequential importance sampling}

  Let's pick a proposal $q$ such that:
    \[
      q\left(s_t|y_{1:t}\right) = q\left(s_t| s_{t-1},y_t\right)q(s_{t-1}| y_{1:t-1})
    \]

    \bigskip

    Then, one can show that:

    \[
     \hat w_t(s_t) = \hat w_{t-1}(s_{t-1})\frac{p\left(y_t|s_t\right)p\left(s_t|s_{t-1}\right)}{q\left(s_t|s_{t-1}, y_t\right)}
   \]

   \bigskip\bigskip

   A particle’s current weight depends on its previous level and an incremental weight. This increment will put more weight on a particle if she is more likely with respect to its history and the current observation.

 \end{frame}


 \begin{frame}[fragile,c]
   \frametitle{Approximated nonlinear filter (III)}
   \framesubtitle{Generic particle filter algorithm}


   \begin{algorithmic}[1]
  \State $\left\{s_{0}^i, w_{0}^i\right\}_{i=1}^N \gets$ initial sample of particles.
  \For{$t \gets 1,T$}
    \ForAll{$i \gets 1,N$}
      \State $\tilde s_t^i \gets q\left(s_t|s_{t-1}^i, y_t\right)$
      \State $\hat w_t^i \gets w_{t-1}^i\frac{p\left(y_t|\tilde s_t^i\right)p\left(\tilde s_t^i| s_{t-1}^i\right)}{q\left(\tilde s_t^i|s_{t-1}^i, y_t\right)}$
    \EndFor
    \State $\tilde w_t^i \gets \frac{\hat w_t^i}{\sum_{i=1}^N\hat w_t^i}$
    \State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets \left\{\tilde s_t^i, \tilde w_t^i\right\}_{i=1}^N$
  \EndFor
\end{algorithmic}

\end{frame}


 \begin{frame}[c]
   \frametitle{Issue 1}
   \framesubtitle{Degeneracy}

   \begin{itemize}

   \item Weights degenerate as $t$ goes to infinity...\newline

   \item Up to the point where all but one particles have zero weights.\newline

   \item We don't want to sum-up a distribution with a single point.\newline

   \item[$\Rightarrow$] Resampling (kill particles with low weights and replicate good particles).

   \end{itemize}

 \end{frame}


 \begin{frame}[fragile,c]
   \frametitle{Approximated nonlinear filter (IV)}
   \framesubtitle{Particle filter algorithm with resampling}


   \begin{algorithmic}[1]
  \State $\left\{s_{0}^i, w_{0}^i\right\}_{i=1}^N \gets$ initial sample of particles.
  \For{$t \gets 1,T$}
    \ForAll{$i \gets 1,N$}
      \State $\tilde s_t^i \gets q\left(s_t|s_{t-1}^i, y_t\right)$
      \State $\hat w_t^i \gets w_{t-1}^i\frac{p\left(y_t|\tilde s_t^i\right)p\left(\tilde s_t^i| s_{t-1}^i\right)}{q\left(\tilde s_t^i|s_{t-1}^i, y_t\right)}$
    \EndFor
    \State $\tilde w_t^i \gets \frac{\hat w_t^i}{\sum_{i=1}^N\hat w_t^i}$
    \State $N_t^{\star} \gets \left(\sum_{i=1}^N \left(\tilde w_t^i\right)^2\right)^{-1}$
    \If{$N_t^{\star}<\alpha N$}
    \State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets $ Resampling step, with $w_t^i=\nicefrac{1}{N}$
    \Else
    \State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets \left\{\tilde s_t^i, \tilde w_t^i\right\}_{i=1}^N$
    \EndIf
  \EndFor
\end{algorithmic}

\end{frame}


\begin{frame}[c]
   \frametitle{Set the proposal distribution}
   \framesubtitle{The blind proposal}

   \[
     q\left(s_t| s_{t-1}, y_t\right) = p\left(s_t | s_{t-1}\right)
   \]

   \bigskip

   \begin{itemize}

   \item Does not use current observation.\newline

   \item Weights recursive expression simplifies into:
     \[
       \widehat \omega_t\left(s_t\right) \propto \widetilde \omega_{t-1}\left(s_{t-1}\right) p\left(y_t | s_t\right)
     \]

   \item Conditional density of observation is:
     \[
       p\left(y_t | \tilde s_t^i\right) = \left(2\pi\right)^{-\frac{n}{2}}~|R|^{-\frac{1}{2}}e^{-\frac{1}{2}\left(y_t-g(\tilde s_t^i;\bm\theta)\right)' R^{-1} \left(y_t-g(\tilde s_t^i;\bm\theta)\right)}
     \]


     \bigskip

   \item[$\Rightarrow$] Measurement errors are mandatory!


   \end{itemize}

 \end{frame}


\begin{frame}[c]
   \frametitle{Issues 2 and 3}
   \framesubtitle{Likelihood}

   \[
     \begin{split}
       p\left(y_{1:T} | \bm\theta \right) &= p\left(y_1 | s_0;\bm\theta \right)p\left(s_0 |\bm\theta \right) \prod_{t=2}^{T}{p\left(y_t | y_{1:t-1};\bm\theta \right)}\\
       &\approx \sum_{i=1}^N{ \widetilde\omega_{t-1}^ip\left(y_t | s_t^i ; \bm\theta\right)}
     \end{split}
   \]

   \bigskip\bigskip

   \begin{enumerate}

   \item How to choose the initial distribution for the states ($s_0$)?\newline

   \item Non differentiability of the likelihhod because of the resampling step.\newline

   \end{enumerate}

 \end{frame}


 \begin{frame}[c]
   \frametitle{Estimation with particle filter}

   \bigskip\bigskip

   \begin{itemize}

   \item Do not use gradient based methods to estimate the mode...\newline

     \medskip

   \item Simplex ``works'', but still issues with the hessian at the estimated mode.\newline

     \medskip

   \item Better to run directly the MCMC.\newline

     \medskip

   \item Slice? SMC...\newline

     \medskip

   \item Do we really need particle (or nonlinear) filters?

   \end{itemize}

 \end{frame}

\end{document}


% Local Variables:
% ispell-check-comments: exclusive
% ispell-local-dictionary: "american-insane"
% TeX-master: t
% End: