Initial commit.

master
Stéphane Adjemian (Argos) 2023-11-15 15:58:39 +01:00
commit 5dd6718355
Signed by: stepan
GPG Key ID: A6D44CB9C64CE77B
3 changed files with 542 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.pdf

21
Makefile Normal file
View File

@ -0,0 +1,21 @@
LATEX = pdflatex
all: slides.pdf clean
slides.pdf: slides.tex
while ($(LATEX) slides.tex ; \
grep -q "Rerun to get cross" slides.log ) do true ; \
done
clean:
rm -f *.aux *.log *.out *.nav *.rel *.toc *.snm *.synctex.gz *.vrb
rm -rf auto
clean-all:
rm -f *.pdf
push: slides.pdf
scp slides.pdf ryuk:~/home/www/dynare.adjemian.eu/prose/ispra-2023.pdf
.PHONY: push

520
slides.tex Normal file
View File

@ -0,0 +1,520 @@
% © Stéphane Adjemian, 2014-2017
\documentclass[10pt,slidestop]{beamer}
\synctex=1
\usepackage{etex}
\usepackage{fourier-orns}
\usepackage{ccicons}
\usepackage{amssymb}
\usepackage{amstext}
\usepackage{amsbsy}
\usepackage{amsopn}
\usepackage{amscd}
\usepackage{amsxtra}
\usepackage{amsthm}
\usepackage{float}
\usepackage{color}
\usepackage{mathrsfs}
\usepackage{bm}
\usepackage{lastpage}
\usepackage[nice]{nicefrac}
\usepackage{setspace}
\usepackage{hyperref}
\usepackage{ragged2e}
\usepackage{listings}
\usepackage{fontawesome}
\usepackage[utf8x]{inputenc}
\usepackage{array}
\usepackage[super]{nth}
\usepackage{algorithm}
\usepackage{algpseudocode}
\newcolumntype{x}[1]{>{\centering\arraybackslash\hspace{0pt}}m{#1}}
\setlength{\parindent}{0pt}
\pdfcompresslevel=9
\newcommand{\trace}{\mathrm{tr}}
\newcommand{\vect}{\mathrm{vec}}
\newcommand{\tracarg}[1]{\mathrm{tr}\left\{#1\right\}}
\newcommand{\vectarg}[1]{\mathrm{vec}\left(#1\right)}
\newcommand{\vecth}[1]{\mathrm{vech}\left(#1\right)}
\newcommand{\iid}[2]{\mathrm{iid}\left(#1,#2\right)}
\newcommand{\normal}[2]{\mathcal N\left(#1,#2\right)}
\newcommand{\dynare}{\href{http://www.dynare.org}{\color{blue}Dynare}}
\newcommand{\AllSample}{ \mathcal Y_T }
\newcommand{\sample}{\mathcal Y_T}
\newcommand{\samplet}[1]{\mathcal Y_{#1}}
\newcommand{\slidetitle}[1]{\fancyhead[L]{\textsc{#1}}}
\definecolor{gray}{gray}{0.4}
\setbeamertemplate{footline}{
{\hfill\vspace*{1pt}\href{https://creativecommons.org/publicdomain/zero/1.0/}{\cczero}
\href{https://git.ithaca.fr/stepan/ispra-2023}{\faCodeFork}
}\hspace{1cm}}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{blocks}[rounded][shadow=true]
\newenvironment{notes}
{\bgroup \justifying\bgroup\tiny\begin{spacing}{1.0}}
{\end{spacing}\egroup\egroup}
\begin{document}
\title{Estimation of nonlinear models with Dynare}
\author[]{}
\institute{\texttt{stephane.adjemian@univ-lemans.fr}}
\date{September, 2023}
\begin{frame}
\titlepage{}
\end{frame}
\begin{frame}
\frametitle{Introduction}
\bigskip
\begin{itemize}
\item Full information estimation of models approximated at higher order...\newline
\item Or, in principle, full information estimation of nonlinear models.\newline
\item Cannot use the (linear) Kalman filter anymore.\newline
\item Dynare provides routines to evaluate the likelihood of models approximated at order $k\geq 1$.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The reduced form model}
\bigskip
\[
\begin{aligned}
& s_t = f( s_{t-1}, \varepsilon_t; \bm\theta ) \\
& y_t = g( s_t; \bm\theta) + e_t \\
\end{aligned}
\]
\bigskip
\begin{itemize}
\item $f(.)$ and $g(.)$ are the state and measurement equations.\newline
\item $\bm\theta\in\Theta\subseteq \mathbb R^m$ a vector of $m$ parameters.\newline
\item Cannot use the (linear) Kalman filter anymore.\newline
\item $s_t$ and $y_t$ are the vectors of state variables and observed variables.\newline
\item Innovations $\varepsilon_t$ and $e_t$ are the structural shocks and measurement errors.\newline
\item $\#(y_t) = \#(e_t)$
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Reduced form with second order approximation}
\bigskip
\begin{itemize}
\item The ``state'' equations:
\[
\begin{split}
s_t = \bar{s}(\bm\theta) & + g_u(\bm\theta) \hat{s}_{t-1} + g_u(\bm\theta) \varepsilon_t \\
& + 0.5 g_{\sigma\sigma}(\bm\theta) \\
& + 0.5 g_{yy}(\bm\theta)\left(\hat{s}_{t-1} \otimes \hat{s}_{t-1}\right) \\
& + 0.5 g_{uu}(\bm\theta) \left(\varepsilon_t \otimes \varepsilon_t\right) \\
& + 0.5 g_{uy}(\bm\theta) \left(\hat{s}_{t-1} \otimes \varepsilon_t\right)
\end{split}
\]
\medskip
\item The measurement equations:
\[
y_t = Z s_t + e_t
\]
\bigskip
where $Z$ is a selection matrix.
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Properties of the state space model}
\begin{itemize}
\item $s_t \sim $ first order Markov process:\newline
\[
p\left(s_t | s_{0:t-1} \right) = p(s_t\left| s_{t-1} \right)
\]
\bigskip\bigskip
\item Observations are conditionally independent:\newline
\[
p\left( y_t | y_{1:t-1}, s_{0:t} \right) = p\left(y_t | s_t \right)
\]
\bigskip\bigskip
\item[$\Rightarrow$] We cannot evaluate the likelihood, product of $p(y_t|y_{1:t-1})$ densities, without tracking the state variables.
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Nonlinear filter (I)}
\begin{itemize}
\item Suppose $p(y_{t-1}|s_{t-1})$ is known $\longrightarrow$ How to compute $p(y_{t}|s_{t})$?\newline
\bigskip
\item First we can predict the states in $t$ given information in $t-1$:
\begin{equation}\label{prediction}
p\left(s_t|y_{1:t-1}\right) = \int p(s_t|s_{t-1})p\left(s_{t-1}|y_{1:t-1}\right)\mathrm d s_{t-1}
\end{equation}
where $p(s_t|s_{t-1})$ is defined by the state equations:
\[
\begin{split}
p\left(s_t|s_{t-1}\right) &= \int p\left(s_t|s_{t-1},\varepsilon_t\right)p\left(\varepsilon_t|s_{t-1}\right)\mathrm d\varepsilon_t\\
&= \int p\left(s_t|s_{t-1},\varepsilon_t\right)p\left(\varepsilon_t\right)\mathrm d\varepsilon_t
\end{split}
\]
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Nonlinear filter (II)}
\begin{itemize}
\item $p\left(s_t|y_{1:t-1}\right)$ can be interpreted as our prior belief about the state variables.\newline
\item Use Bayes theorem to update our beliefs:
\begin{equation}\label{update}
p\left(s_t|y_{1:t}\right) = \frac{p\left(y_t|s_t\right)p\left(s_t|y_{1:t-1}\right)}{p\left(y_t|y_{1:t-1}\right)}
\end{equation}
where:
\[
p\left(y_t|y_{1:t-1}\right) = \int p\left(y_t|s_t\right)p\left(s_t|y_{1:t-1}\right) \mathrm d s_t
\]
which is the conditional density required to evaluate the likelihood of the model.\newline
\item $p\left(y_t|s_t\right)$, likelihood of $s_t$, is defined by the measurement equations.\newline
\item[$\Rightarrow$] Iterate over (\ref{prediction}) and (\ref{update}).
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Approximated nonlinear filter (I)}
\framesubtitle{Approximate the distribution of $s_t| y_{1:t}$}
\begin{itemize}
\item Suppose the distribution of $s_t| y_{1:t}$ can be accurately approximated by
a set of nodes and weights
$\left\{\left(s_t^i,\omega_t^i\right)\right\}_{i=1}^N$, with non negative weights
summing up to one.\newline
\item[$\Rightarrow$] When $N\rightarrow\infty$ $\sum_{i=1}^N\omega_i h(s_t^i)$ converges to $\mathbb E_{p(s_t|y_{1:t})}\left[h(s)\right]$.\newline
\item Can be used to approximate the predictive densities:
\[
\widehat{p}\left(s_t|y_{1:t-1}\right) =
\sum_{i=1}^N p(s_t | s_{t-1}^i)\omega_{t-1}^i
\]
\item Particles $\leftarrow$ random deviates from $s_t| y_{1:t}$.\newline
\item How to sample from $s_t| y_{1:t}$?
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Approximated nonlinear filter (II)}
\framesubtitle{Importance sampling (a)}
\begin{itemize}
\item Suppose we can sample from $q\left(s_t| y_{1:t}\right)$ but not from $q\left(s_t| y_{1:t}\right)$.\newline
\item Then:
\[
\begin{split}
\mathbb E_{p(s_t|y_{1:t})}\left[h(s)\right] &= \int
\underbrace{\frac{p\left(s_t| y_{1:t}\right)}{q\left(s_t| y_{1:t}\right)}}_{\widetilde w_t(s_t)}q\left(s_t| y_{1:t}\right) h(s_t)\mathrm d s_t\\
&= \mathbb E_{q(s_t|y_{1:t})}\left[\widetilde \omega_t(s_t)h(s_t)\right]
\end{split}
\]
\item By the Bayes theorem:
\[
p\left(s_t | y_{1:t} \right) = \frac{p\left(y_{1:t}|s_t\right)p\left(s_t\right)}{p\left(y_{1:t}\right)}
\]
\item Unormalized weights:
\[
\widehat \omega_t(s_t) = \frac{p\left(y_{1:t}|s_t\right)p\left(s_t\right)}{q\left(s_t| y_{1:t}\right)}
= p\left(y_{1:t}\right)\widetilde \omega_t(s_t)
\]
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Approximated nonlinear filter (II)}
\framesubtitle{Importance sampling (b)}
With particles:
\[
\hat \omega_t^i = \frac{p\left(y_{1:t}|s_{t}^i\right)p\left(s_t^i\right)}{q\left(s_t^i| y_{1:t}\right)}
\]
\bigskip
Then
\[
\widehat{\mathbb E}_{p(s_t|y_{1:t})}\left[h(s)\right] = \sum_{i=1}^N \widetilde{\omega}_ih(s_t^i)
\]
where
\[
\widetilde{\omega}_i = \frac{\widehat \omega_i}{\sum_{i=1}^N\widehat \omega_i}
\]
\end{frame}
\begin{frame}[c]
\frametitle{Approximated nonlinear filter (III)}
\framesubtitle{Sequential importance sampling}
Let's pick a proposal $q$ such that:
\[
q\left(s_t|y_{1:t}\right) = q\left(s_t| s_{t-1},y_t\right)q(s_{t-1}| y_{1:t-1})
\]
\bigskip
Then, one can show that:
\[
\hat w_t(s_t) = \hat w_{t-1}(s_{t-1})\frac{p\left(y_t|s_t\right)p\left(s_t|s_{t-1}\right)}{q\left(s_t|s_{t-1}, y_t\right)}
\]
\bigskip\bigskip
A particles current weight depends on its previous level and an incremental weight. This increment will put more weight on a particle if she is more likely with respect to its history and the current observation.
\end{frame}
\begin{frame}[fragile,c]
\frametitle{Approximated nonlinear filter (III)}
\framesubtitle{Generic particle filter algorithm}
\begin{algorithmic}[1]
\State $\left\{s_{0}^i, w_{0}^i\right\}_{i=1}^N \gets$ initial sample of particles.
\For{$t \gets 1,T$}
\ForAll{$i \gets 1,N$}
\State $\tilde s_t^i \gets q\left(s_t|s_{t-1}^i, y_t\right)$
\State $\hat w_t^i \gets w_{t-1}^i\frac{p\left(y_t|\tilde s_t^i\right)p\left(\tilde s_t^i| s_{t-1}^i\right)}{q\left(\tilde s_t^i|s_{t-1}^i, y_t\right)}$
\EndFor
\State $\tilde w_t^i \gets \frac{\hat w_t^i}{\sum_{i=1}^N\hat w_t^i}$
\State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets \left\{\tilde s_t^i, \tilde w_t^i\right\}_{i=1}^N$
\EndFor
\end{algorithmic}
\end{frame}
\begin{frame}[c]
\frametitle{Issue 1}
\framesubtitle{Degeneracy}
\begin{itemize}
\item Weights degenerate as $t$ goes to infinity...\newline
\item Up to the point where all but one particles have zero weights.\newline
\item We don't want to sum-up a distribution with a single point.\newline
\item[$\Rightarrow$] Resampling (kill particles with low weights and replicate good particles).
\end{itemize}
\end{frame}
\begin{frame}[fragile,c]
\frametitle{Approximated nonlinear filter (IV)}
\framesubtitle{Particle filter algorithm with resampling}
\begin{algorithmic}[1]
\State $\left\{s_{0}^i, w_{0}^i\right\}_{i=1}^N \gets$ initial sample of particles.
\For{$t \gets 1,T$}
\ForAll{$i \gets 1,N$}
\State $\tilde s_t^i \gets q\left(s_t|s_{t-1}^i, y_t\right)$
\State $\hat w_t^i \gets w_{t-1}^i\frac{p\left(y_t|\tilde s_t^i\right)p\left(\tilde s_t^i| s_{t-1}^i\right)}{q\left(\tilde s_t^i|s_{t-1}^i, y_t\right)}$
\EndFor
\State $\tilde w_t^i \gets \frac{\hat w_t^i}{\sum_{i=1}^N\hat w_t^i}$
\State $N_t^{\star} \gets \left(\sum_{i=1}^N \left(\tilde w_t^i\right)^2\right)^{-1}$
\If{$N_t^{\star}<\alpha N$}
\State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets $ Resampling step, with $w_t^i=\nicefrac{1}{N}$
\Else
\State $\left\{s_t^i, w_t^i\right\}_{i=1}^N \gets \left\{\tilde s_t^i, \tilde w_t^i\right\}_{i=1}^N$
\EndIf
\EndFor
\end{algorithmic}
\end{frame}
\begin{frame}[c]
\frametitle{Set the proposal distribution}
\framesubtitle{The blind proposal}
\[
q\left(s_t| s_{t-1}, y_t\right) = p\left(s_t | s_{t-1}\right)
\]
\bigskip
\begin{itemize}
\item Does not use current observation.\newline
\item Weights recursive expression simplifies into:
\[
\widehat \omega_t\left(s_t\right) \propto \widetilde \omega_{t-1}\left(s_{t-1}\right) p\left(y_t | s_t\right)
\]
\item Conditional density of observation is:
\[
p\left(y_t | \tilde s_t^i\right) = \left(2\pi\right)^{-\frac{n}{2}}~|R|^{-\frac{1}{2}}e^{-\frac{1}{2}\left(y_t-g(\tilde s_t^i;\bm\theta)\right)' R^{-1} \left(y_t-g(\tilde s_t^i;\bm\theta)\right)}
\]
\bigskip
\item[$\Rightarrow$] Measurement errors are mandatory!
\end{itemize}
\end{frame}
\begin{frame}[c]
\frametitle{Issues 2 and 3}
\framesubtitle{Likelihood}
\[
\begin{split}
p\left(y_{1:T} | \bm\theta \right) &= p\left(y_1 | s_0;\bm\theta \right)p\left(s_0 |\bm\theta \right) \prod_{t=2}^{T}{p\left(y_t | y_{1:t-1};\bm\theta \right)}\\
&\approx \sum_{i=1}^N{ \widetilde\omega_{t-1}^ip\left(y_t | s_t^i ; \bm\theta\right)}
\end{split}
\]
\bigskip\bigskip
\begin{enumerate}
\item How to choose the initial distribution for the states ($s_0$)?\newline
\item Non differentiability of the likelihhod because of the resampling step.\newline
\end{enumerate}
\end{frame}
\begin{frame}[c]
\frametitle{Estimation with particle filter}
\bigskip\bigskip
\begin{itemize}
\item Do not use gradient based methods to estimate the mode...\newline
\medskip
\item Simplex ``works'', but still issues with the hessian at the estimated mode.\newline
\medskip
\item Better to run directly the MCMC.\newline
\medskip
\item Slice? SMC...\newline
\medskip
\item Do we really need particle (or nonlinear) filters?
\end{itemize}
\end{frame}
\end{document}
% Local Variables:
% ispell-check-comments: exclusive
% ispell-local-dictionary: "american-insane"
% TeX-master: t
% End: