commit 5dd671835541eaa8ee61956ffb726a8ce4388192
Author: Stéphane Adjemian (Argos) <stepan@adjemian.eu>
Date:   Wed Nov 15 15:58:39 2023 +0100

    Initial commit.

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f08278d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pdf
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..d34e5ea
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,21 @@
+LATEX = pdflatex
+
+all: slides.pdf clean
+
+slides.pdf: slides.tex
+	while ($(LATEX) slides.tex ; \
+	grep -q "Rerun to get cross" slides.log ) do true ; \
+	done
+
+clean:
+	rm -f *.aux *.log *.out *.nav *.rel *.toc *.snm *.synctex.gz *.vrb
+	rm -rf auto
+
+clean-all:
+	rm -f *.pdf
+
+push: slides.pdf
+	scp slides.pdf ryuk:~/home/www/dynare.adjemian.eu/prose/ispra-2023.pdf
+
+
+.PHONY: push
diff --git a/slides.tex b/slides.tex
new file mode 100644
index 0000000..235b018
--- /dev/null
+++ b/slides.tex
@@ -0,0 +1,520 @@
+% © Stéphane Adjemian, 2014-2017
+
+\documentclass[10pt,slidestop]{beamer}
+\synctex=1
+
+\usepackage{etex}
+\usepackage{fourier-orns}
+\usepackage{ccicons}
+\usepackage{amssymb}
+\usepackage{amstext}
+\usepackage{amsbsy}
+\usepackage{amsopn}
+\usepackage{amscd}
+\usepackage{amsxtra}
+\usepackage{amsthm}
+\usepackage{float}
+\usepackage{color}
+\usepackage{mathrsfs}
+\usepackage{bm}
+\usepackage{lastpage}
+\usepackage[nice]{nicefrac}
+\usepackage{setspace}
+\usepackage{hyperref}
+\usepackage{ragged2e}
+\usepackage{listings}
+\usepackage{fontawesome}
+
+\usepackage[utf8x]{inputenc}
+
+\usepackage{array}
+\usepackage[super]{nth}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+\newcolumntype{x}[1]{>{\centering\arraybackslash\hspace{0pt}}m{#1}}
+\setlength{\parindent}{0pt}
+\pdfcompresslevel=9
+
+
+
+\newcommand{\trace}{\mathrm{tr}}
+\newcommand{\vect}{\mathrm{vec}}
+\newcommand{\tracarg}[1]{\mathrm{tr}\left\{#1\right\}}
+\newcommand{\vectarg}[1]{\mathrm{vec}\left(#1\right)}
+\newcommand{\vecth}[1]{\mathrm{vech}\left(#1\right)}
+\newcommand{\iid}[2]{\mathrm{iid}\left(#1,#2\right)}
+\newcommand{\normal}[2]{\mathcal N\left(#1,#2\right)}
+\newcommand{\dynare}{\href{http://www.dynare.org}{\color{blue}Dynare}}
+\newcommand{\AllSample}{ \mathcal Y_T }
+\newcommand{\sample}{\mathcal Y_T}
+\newcommand{\samplet}[1]{\mathcal Y_{#1}}
+\newcommand{\slidetitle}[1]{\fancyhead[L]{\textsc{#1}}}
+
+\definecolor{gray}{gray}{0.4}
+
+\setbeamertemplate{footline}{
+{\hfill\vspace*{1pt}\href{https://creativecommons.org/publicdomain/zero/1.0/}{\cczero}
+\href{https://git.ithaca.fr/stepan/ispra-2023}{\faCodeFork}
+}\hspace{1cm}}
+
+\setbeamertemplate{navigation symbols}{}
+\setbeamertemplate{blocks}[rounded][shadow=true]
+\newenvironment{notes}
+{\bgroup \justifying\bgroup\tiny\begin{spacing}{1.0}}
+{\end{spacing}\egroup\egroup}
+
+\begin{document}
+
+\title{Estimation of nonlinear models with Dynare}
+\author[]{}
+\institute{\texttt{stephane.adjemian@univ-lemans.fr}}
+\date{September,  2023}
+
+\begin{frame}
+  \titlepage{}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Introduction}
+
+  \bigskip
+  
+  \begin{itemize}
+
+  \item Full information estimation of models approximated at higher order...\newline
+
+  \item Or, in principle, full information estimation of nonlinear models.\newline
+
+  \item Cannot use the (linear) Kalman filter anymore.\newline
+
+  \item Dynare provides routines to evaluate the likelihood of models approximated at order $k\geq 1$.
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}
+  \frametitle{The reduced form model}
+
+  \bigskip
+
+  \[
+    \begin{aligned}
+      & s_t = f( s_{t-1}, \varepsilon_t; \bm\theta ) \\
+      & y_t = g( s_t; \bm\theta) + e_t \\
+    \end{aligned}
+  \]
+
+  \bigskip
+
+  \begin{itemize}
+
+  \item $f(.)$ and $g(.)$ are the state and measurement equations.\newline
+
+  \item $\bm\theta\in\Theta\subseteq \mathbb R^m$ a vector of $m$ parameters.\newline
+
+  \item Cannot use the (linear) Kalman filter anymore.\newline
+
+  \item $s_t$ and $y_t$ are the vectors of state variables and observed variables.\newline
+
+  \item Innovations $\varepsilon_t$ and $e_t$ are the structural shocks and measurement errors.\newline
+
+  \item $\#(y_t) = \#(e_t)$
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}
+  \frametitle{Reduced form with second order approximation}
+
+  \bigskip
+
+  \begin{itemize}
+
+  \item The ``state'' equations:
+
+  \[
+    \begin{split}
+      s_t = \bar{s}(\bm\theta) & + g_u(\bm\theta) \hat{s}_{t-1} + g_u(\bm\theta) \varepsilon_t \\
+                               & + 0.5 g_{\sigma\sigma}(\bm\theta) \\
+                               & + 0.5 g_{yy}(\bm\theta)\left(\hat{s}_{t-1} \otimes \hat{s}_{t-1}\right) \\
+                               & + 0.5 g_{uu}(\bm\theta) \left(\varepsilon_t \otimes \varepsilon_t\right) \\
+                               & + 0.5 g_{uy}(\bm\theta) \left(\hat{s}_{t-1} \otimes \varepsilon_t\right)
+    \end{split}
+  \]
+
+  \medskip
+
+  \item The measurement equations:
+
+
+  \[
+    y_t = Z s_t + e_t
+  \]
+
+  \bigskip
+
+  where $Z$ is a selection matrix.
+
+  \end{itemize}
+
+\end{frame}
+
+
+
+\begin{frame}[c]
+  \frametitle{Properties of the state space model}
+
+  \begin{itemize}
+
+  \item $s_t \sim $ first order Markov process:\newline
+
+    \[
+      p\left(s_t | s_{0:t-1} \right) = p(s_t\left| s_{t-1} \right)
+    \]
+
+    \bigskip\bigskip
+
+  \item Observations are conditionally independent:\newline
+
+    \[
+      p\left( y_t | y_{1:t-1}, s_{0:t} \right) = p\left(y_t | s_t \right)
+    \]
+
+    \bigskip\bigskip
+
+  \item[$\Rightarrow$] We cannot evaluate the likelihood, product of $p(y_t|y_{1:t-1})$ densities, without tracking the state variables.
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}[c]
+  \frametitle{Nonlinear filter (I)}
+
+  \begin{itemize}
+
+  \item Suppose $p(y_{t-1}|s_{t-1})$ is known $\longrightarrow$ How to compute $p(y_{t}|s_{t})$?\newline
+
+
+    \bigskip
+
+
+  \item First we can predict the states in $t$ given information in $t-1$:
+    \begin{equation}\label{prediction}
+      p\left(s_t|y_{1:t-1}\right) = \int p(s_t|s_{t-1})p\left(s_{t-1}|y_{1:t-1}\right)\mathrm d s_{t-1}
+    \end{equation}
+    where $p(s_t|s_{t-1})$ is defined by the state equations:
+    \[
+      \begin{split}
+        p\left(s_t|s_{t-1}\right) &= \int p\left(s_t|s_{t-1},\varepsilon_t\right)p\left(\varepsilon_t|s_{t-1}\right)\mathrm d\varepsilon_t\\
+      &= \int p\left(s_t|s_{t-1},\varepsilon_t\right)p\left(\varepsilon_t\right)\mathrm d\varepsilon_t
+      \end{split}
+    \]
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}[c]
+  \frametitle{Nonlinear filter (II)}
+
+  \begin{itemize}
+
+  \item $p\left(s_t|y_{1:t-1}\right)$ can be interpreted as our prior belief about the state variables.\newline
+
+  \item Use Bayes theorem to update our beliefs:
+    \begin{equation}\label{update}
+      p\left(s_t|y_{1:t}\right) = \frac{p\left(y_t|s_t\right)p\left(s_t|y_{1:t-1}\right)}{p\left(y_t|y_{1:t-1}\right)}
+    \end{equation}
+    where:
+    \[
+      p\left(y_t|y_{1:t-1}\right) = \int p\left(y_t|s_t\right)p\left(s_t|y_{1:t-1}\right) \mathrm d s_t
+    \]
+    which is the conditional density required to evaluate the likelihood of the model.\newline
+
+  \item  $p\left(y_t|s_t\right)$, likelihood of $s_t$, is defined by the measurement equations.\newline
+
+  \item[$\Rightarrow$] Iterate over (\ref{prediction}) and (\ref{update}).
+
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}[c]
+  \frametitle{Approximated nonlinear filter (I)}
+  \framesubtitle{Approximate the distribution of $s_t| y_{1:t}$}
+
+  \begin{itemize}
+
+  \item Suppose the distribution of $s_t| y_{1:t}$ can be accurately approximated by
+    a set of nodes and weights
+    $\left\{\left(s_t^i,\omega_t^i\right)\right\}_{i=1}^N$, with non negative weights
+    summing up to one.\newline
+
+  \item[$\Rightarrow$] When $N\rightarrow\infty$ $\sum_{i=1}^N\omega_i h(s_t^i)$ converges to $\mathbb E_{p(s_t|y_{1:t})}\left[h(s)\right]$.\newline
+
+  \item Can be used to approximate the predictive densities:
+    \[
+      \widehat{p}\left(s_t|y_{1:t-1}\right) =
+      \sum_{i=1}^N p(s_t | s_{t-1}^i)\omega_{t-1}^i
+    \]
+
+  \item Particles $\leftarrow$ random deviates from $s_t| y_{1:t}$.\newline
+
+  \item How to sample from $s_t| y_{1:t}$?
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}[c]
+  \frametitle{Approximated nonlinear filter (II)}
+  \framesubtitle{Importance sampling (a)}
+
+  \begin{itemize}
+
+  \item Suppose we can sample from $q\left(s_t| y_{1:t}\right)$ but not from $q\left(s_t| y_{1:t}\right)$.\newline
+
+  \item Then:
+    \[
+      \begin{split}
+        \mathbb E_{p(s_t|y_{1:t})}\left[h(s)\right] &= \int
+                                                      \underbrace{\frac{p\left(s_t| y_{1:t}\right)}{q\left(s_t| y_{1:t}\right)}}_{\widetilde w_t(s_t)}q\left(s_t| y_{1:t}\right) h(s_t)\mathrm d s_t\\
+        &= \mathbb E_{q(s_t|y_{1:t})}\left[\widetilde \omega_t(s_t)h(s_t)\right]
+      \end{split}
+    \]
+
+  \item By the Bayes theorem:
+    \[
+      p\left(s_t | y_{1:t} \right) = \frac{p\left(y_{1:t}|s_t\right)p\left(s_t\right)}{p\left(y_{1:t}\right)}
+    \]
+
+  \item Unormalized weights:
+    \[
+      \widehat \omega_t(s_t) = \frac{p\left(y_{1:t}|s_t\right)p\left(s_t\right)}{q\left(s_t| y_{1:t}\right)}
+      = p\left(y_{1:t}\right)\widetilde \omega_t(s_t)
+    \]
+
+  \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}[c]
+  \frametitle{Approximated nonlinear filter (II)}
+  \framesubtitle{Importance sampling (b)}
+
+  With particles:
+    \[
+      \hat \omega_t^i = \frac{p\left(y_{1:t}|s_{t}^i\right)p\left(s_t^i\right)}{q\left(s_t^i| y_{1:t}\right)}
+    \]
+
+    \bigskip
+
+    Then
+
+    \[
+      \widehat{\mathbb E}_{p(s_t|y_{1:t})}\left[h(s)\right] = \sum_{i=1}^N \widetilde{\omega}_ih(s_t^i)
+    \]
+
+    where
+
+    \[
+      \widetilde{\omega}_i = \frac{\widehat \omega_i}{\sum_{i=1}^N\widehat \omega_i}
+    \]
+  \end{frame}
+
+
+  \begin{frame}[c]
+  \frametitle{Approximated nonlinear filter (III)}
+  \framesubtitle{Sequential importance sampling}
+
+  Let's pick a proposal $q$ such that:
+    \[
+      q\left(s_t|y_{1:t}\right) = q\left(s_t| s_{t-1},y_t\right)q(s_{t-1}| y_{1:t-1})
+    \]
+
+    \bigskip
+
+    Then, one can show that:
+
+    \[
+     \hat w_t(s_t) = \hat w_{t-1}(s_{t-1})\frac{p\left(y_t|s_t\right)p\left(s_t|s_{t-1}\right)}{q\left(s_t|s_{t-1}, y_t\right)}
+   \]
+
+   \bigskip\bigskip
+
+   A particle’s current weight depends on its previous level and an incremental weight. This increment will put more weight on a particle if she is more likely with respect to its history and the current observation.
+
+ \end{frame}
+
+
+ \begin{frame}[fragile,c]
+   \frametitle{Approximated nonlinear filter (III)}
+   \framesubtitle{Generic particle filter algorithm}
+
+
+   \begin{algorithmic}[1]
+  \State $\left\{s_{0}^i, w_{0}^i\right\}_{i=1}^N \gets$ initial sample of particles.
+  \For{$t \gets 1,T$}
+    \ForAll{$i \gets 1,N$}
+      \State $\tilde s_t^i \gets q\left(s_t|s_{t-1}^i, y_t\right)$
+      \State $\hat w_t^i \gets w_{t-1}^i\frac{p\left(y_t|\tilde s_t^i\right)p\left(\tilde s_t^i| s_{t-1}^i\right)}{q\left(\tilde s_t^i|s_{t-1}^i, y_t\right)}$
+    \EndFor
+    \State $\tilde w_t^i \gets \frac{\hat w_t^i}{\sum_{i=1}^N\hat w_t^i}$
+    \State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets \left\{\tilde s_t^i, \tilde w_t^i\right\}_{i=1}^N$
+  \EndFor
+\end{algorithmic}
+
+\end{frame}
+
+
+ \begin{frame}[c]
+   \frametitle{Issue 1}
+   \framesubtitle{Degeneracy}
+
+   \begin{itemize}
+
+   \item Weights degenerate as $t$ goes to infinity...\newline
+
+   \item Up to the point where all but one particles have zero weights.\newline
+
+   \item We don't want to sum-up a distribution with a single point.\newline
+
+   \item[$\Rightarrow$] Resampling (kill particles with low weights and replicate good particles).
+
+   \end{itemize}
+
+ \end{frame}
+
+
+ \begin{frame}[fragile,c]
+   \frametitle{Approximated nonlinear filter (IV)}
+   \framesubtitle{Particle filter algorithm with resampling}
+
+
+   \begin{algorithmic}[1]
+  \State $\left\{s_{0}^i, w_{0}^i\right\}_{i=1}^N \gets$ initial sample of particles.
+  \For{$t \gets 1,T$}
+    \ForAll{$i \gets 1,N$}
+      \State $\tilde s_t^i \gets q\left(s_t|s_{t-1}^i, y_t\right)$
+      \State $\hat w_t^i \gets w_{t-1}^i\frac{p\left(y_t|\tilde s_t^i\right)p\left(\tilde s_t^i| s_{t-1}^i\right)}{q\left(\tilde s_t^i|s_{t-1}^i, y_t\right)}$
+    \EndFor
+    \State $\tilde w_t^i \gets \frac{\hat w_t^i}{\sum_{i=1}^N\hat w_t^i}$
+    \State $N_t^{\star} \gets \left(\sum_{i=1}^N \left(\tilde w_t^i\right)^2\right)^{-1}$
+    \If{$N_t^{\star}<\alpha N$}
+    \State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets $ Resampling step, with $w_t^i=\nicefrac{1}{N}$
+    \Else
+    \State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets \left\{\tilde s_t^i, \tilde w_t^i\right\}_{i=1}^N$
+    \EndIf
+  \EndFor
+\end{algorithmic}
+
+\end{frame}
+
+
+\begin{frame}[c]
+   \frametitle{Set the proposal distribution}
+   \framesubtitle{The blind proposal}
+
+   \[
+     q\left(s_t| s_{t-1}, y_t\right) = p\left(s_t | s_{t-1}\right)
+   \]
+
+   \bigskip
+
+   \begin{itemize}
+
+   \item Does not use current observation.\newline
+
+   \item Weights recursive expression simplifies into:
+     \[
+       \widehat \omega_t\left(s_t\right) \propto \widetilde \omega_{t-1}\left(s_{t-1}\right) p\left(y_t | s_t\right)
+     \]
+
+   \item Conditional density of observation is:
+     \[
+       p\left(y_t | \tilde s_t^i\right) = \left(2\pi\right)^{-\frac{n}{2}}~|R|^{-\frac{1}{2}}e^{-\frac{1}{2}\left(y_t-g(\tilde s_t^i;\bm\theta)\right)' R^{-1} \left(y_t-g(\tilde s_t^i;\bm\theta)\right)}
+     \]
+
+
+     \bigskip
+
+   \item[$\Rightarrow$] Measurement errors are mandatory!
+
+
+   \end{itemize}
+
+ \end{frame}
+
+
+\begin{frame}[c]
+   \frametitle{Issues 2 and 3}
+   \framesubtitle{Likelihood}
+
+   \[
+     \begin{split}
+       p\left(y_{1:T} | \bm\theta \right) &= p\left(y_1 | s_0;\bm\theta \right)p\left(s_0 |\bm\theta \right) \prod_{t=2}^{T}{p\left(y_t | y_{1:t-1};\bm\theta \right)}\\
+       &\approx \sum_{i=1}^N{ \widetilde\omega_{t-1}^ip\left(y_t | s_t^i ; \bm\theta\right)}
+     \end{split}
+   \]
+
+   \bigskip\bigskip
+
+   \begin{enumerate}
+
+   \item How to choose the initial distribution for the states ($s_0$)?\newline
+
+   \item Non differentiability of the likelihhod because of the resampling step.\newline
+
+   \end{enumerate}
+
+ \end{frame}
+
+
+ \begin{frame}[c]
+   \frametitle{Estimation with particle filter}
+
+   \bigskip\bigskip
+
+   \begin{itemize}
+
+   \item Do not use gradient based methods to estimate the mode...\newline
+
+     \medskip
+
+   \item Simplex ``works'', but still issues with the hessian at the estimated mode.\newline
+
+     \medskip
+
+   \item Better to run directly the MCMC.\newline
+
+     \medskip
+
+   \item Slice? SMC...\newline
+
+     \medskip
+
+   \item Do we really need particle (or nonlinear) filters?
+
+   \end{itemize}
+
+ \end{frame}
+
+\end{document}
+
+
+% Local Variables:
+% ispell-check-comments: exclusive
+% ispell-local-dictionary: "american-insane"
+% TeX-master: t
+% End: