From 241fd074246269e94f6c0f8cfb3f04bf1fce0881 Mon Sep 17 00:00:00 2001
From: Johannes Pfeifer <jpfeifer@gmx,de>
Date: Mon, 16 Sep 2013 17:19:56 +0200
Subject: [PATCH] Add Geweke 1992 convergence diagnostics

---
 matlab/McMCDiagnostics.m       |  59 ++++++++++++++++--
 matlab/dynare_estimation_1.m   |   4 +-
 matlab/geweke_chi2_test.m      |  74 ++++++++++++++++++++++
 matlab/geweke_moments.m        | 109 +++++++++++++++++++++++++++++++++
 matlab/global_initialization.m |   4 ++
 5 files changed, 242 insertions(+), 8 deletions(-)
 create mode 100644 matlab/geweke_chi2_test.m
 create mode 100644 matlab/geweke_moments.m

diff --git a/matlab/McMCDiagnostics.m b/matlab/McMCDiagnostics.m
index d4a26cb8c..6491be936 100644
--- a/matlab/McMCDiagnostics.m
+++ b/matlab/McMCDiagnostics.m
@@ -1,4 +1,4 @@
-function McMCDiagnostics(options_, estim_params_, M_)
+function oo_ = McMCDiagnostics(options_, estim_params_, M_, oo_)
 % function McMCDiagnostics
 % Computes convergence tests 
 % 
@@ -8,7 +8,7 @@ function McMCDiagnostics(options_, estim_params_, M_)
 %   M_               [structure]
 %
 % OUTPUTS 
-%   none  
+%   oo_              [structure] 
 %
 % SPECIAL REQUIREMENTS
 %   none
@@ -38,10 +38,6 @@ MhDirectoryName = CheckPath('metropolis',M_.dname);
 
 TeX = options_.TeX;
 nblck = options_.mh_nblck;
-% Brooks and Gelman tests need more than one block 
-if nblck == 1
-    return;
-end
 npar = estim_params_.nvx;
 npar = npar + estim_params_.nvn;
 npar = npar + estim_params_.ncx;
@@ -56,6 +52,57 @@ NumberOfMcFilesPerBlock = size(dir([MhDirectoryName ,filesep, M_.fname '_mh*_blc
 % check if all previous files are there for block 1
 check_presence_consecutive_MC_files(MhDirectoryName,M_.fname,1)
 
+if nblck == 1 % Brooks and Gelman tests need more than one block 
+    convergence_diagnostics_geweke=zeros(npar,4+2*length(options_.convergence.geweke.taper_steps));
+    if any(options_.convergence.geweke.geweke_interval<0) || any(options_.convergence.geweke.geweke_interval>1) || length(any(options_.convergence.geweke.geweke_interval<0))~=2 ...
+        || (options_.convergence.geweke.geweke_interval(2)-options_.convergence.geweke.geweke_interval(1)<0)
+        fprintf('\nCONVERGENCE DIAGNOSTICS: Invalid option for geweke_interval. Using the default of [0.2 0.5].\n')
+        options_.convergence.geweke.geweke_interval=[0.2 0.5];
+    end
+    first_obs_begin_sample = max(1,ceil(options_.mh_drop*options_.mh_replic));
+    last_obs_begin_sample = first_obs_begin_sample+round(options_.convergence.geweke.geweke_interval(1)*options_.mh_replic*options_.mh_drop);
+    first_obs_end_sample = first_obs_begin_sample+round(options_.convergence.geweke.geweke_interval(2)*options_.mh_replic*options_.mh_drop);
+    param_name=[];
+    for jj=1:npar
+        param_name = strvcat(param_name,get_the_name(jj,options_.TeX,M_,estim_params_,options_));
+    end
+    fprintf('\nGeweke (1992) Convergence Tests, based on means of draws %d to %d vs %d to %d.\n',first_obs_begin_sample,last_obs_begin_sample,first_obs_end_sample,options_.mh_replic);
+    fprintf('p-values are for Chi2-test for equality of means.\n');    
+    Geweke_header={'Parameter', 'Post. Mean', 'Post. Std', 'p-val No Taper'};
+    print_string=['%',num2str(size(param_name,2)+3),'s \t %12.3f \t %12.3f \t %12.3f'];
+    print_string_header=['%',num2str(size(param_name,2)+3),'s \t %12s \t %12s \t %12s'];    
+    for ii=1:length(options_.convergence.geweke.taper_steps)
+        Geweke_header=[Geweke_header, ['p-val ' num2str(options_.convergence.geweke.taper_steps(ii)),'% Taper']];
+        print_string=[print_string,'\t %12.3f'];
+        print_string_header=[print_string_header,'\t %12s'];
+    end
+    print_string=[print_string,'\n'];
+    print_string_header=[print_string_header,'\n'];
+    fprintf(print_string_header,Geweke_header{1,:});
+    for jj=1:npar
+        startline=0;
+        for n = 1:NumberOfMcFilesPerBlock
+            load([MhDirectoryName '/' M_.fname '_mh',int2str(n),'_blck1.mat'],'x2');
+            nx2 = size(x2,1);
+            param_draws(startline+(1:nx2),1) = x2(:,jj);
+            startline = startline + nx2;
+        end
+        [results_vec, results_struct] = geweke_moments(param_draws,options_);
+        convergence_diagnostics_geweke(jj,:)=results_vec;
+    
+        param_draws1 = param_draws(first_obs_begin_sample:last_obs_begin_sample,:);
+        param_draws2 = param_draws(first_obs_end_sample:end,:);
+        [results_vec1] = geweke_moments(param_draws1,options_);
+        [results_vec2] = geweke_moments(param_draws2,options_);
+        
+        results_struct = geweke_chi2_test(results_vec1,results_vec2,results_struct,options_);
+        eval(['oo_.convergence.geweke.',param_name(jj,:),'=results_struct;'])
+        fprintf(print_string,param_name(jj,:),results_struct.posteriormean,results_struct.posteriorstd,results_struct.prob_chi2_test)
+    end
+    skipline(2);
+    return;
+end
+
 for blck = 2:nblck
     tmp = size(dir([MhDirectoryName ,filesep, M_.fname '_mh*_blck' int2str(blck) '.mat']),1);
     if tmp~=NumberOfMcFilesPerBlock
diff --git a/matlab/dynare_estimation_1.m b/matlab/dynare_estimation_1.m
index 7e5071ded..73e24df17 100644
--- a/matlab/dynare_estimation_1.m
+++ b/matlab/dynare_estimation_1.m
@@ -611,8 +611,8 @@ if (any(bayestopt_.pshape  >0 ) && options_.mh_replic) || ...
         CutSample(M_, options_, estim_params_);
         return
     else
-        if ~options_.nodiagnostic && options_.mh_replic > 2000 && options_.mh_nblck > 1
-            McMCDiagnostics(options_, estim_params_, M_);
+        if ~options_.nodiagnostic && options_.mh_replic > 2000
+            oo_= McMCDiagnostics(options_, estim_params_, M_,oo_);
         end
         %% Here i discard first half of the draws:
         CutSample(M_, options_, estim_params_);
diff --git a/matlab/geweke_chi2_test.m b/matlab/geweke_chi2_test.m
new file mode 100644
index 000000000..7ed383139
--- /dev/null
+++ b/matlab/geweke_chi2_test.m
@@ -0,0 +1,74 @@
+function results_struct = geweke_chi2_test(results1,results2,results_struct,options)
+% results_struct = geweke_chi2_test(results1,results2,results_struct,options)
+% PURPOSE: computes Geweke's chi-squared test for two sets of MCMC sample draws
+%
+% INPUTS 
+%   results1         [1 by (4+n_taper*2) vector] vector with post. mean,
+%                           std, NSE_iid, RNE_iid, and tapered NSE and RNE
+%                           for chain part 1
+%   results2         [1 by (4+n_taper*2) vector] vector with post. mean,
+%                           std, NSE_iid, RNE_iid, and tapered NSE and RNE
+%                           for chain part 2
+%   results_struct   [structure] results structure generated by geweke_moments
+%   Dynareoptions    [structure]
+%
+% OUTPUTS 
+%   results_struct   [structure]  containing the following fields:
+%       pooled_mean               Pooled mean of the chain parts, weighted
+%                                   with precision
+%       rpooled_nse               Pooled NSE of the chain parts, weighted
+%                                   with precision
+%       prob_chi2_test            p-value of Chi2-test for equality of
+%                                   means in both chain parts
+% -----------------------------------------------------------------
+
+%
+% SPECIAL REQUIREMENTS
+%   None.
+
+% Copyright (C) 2013 Dynare Team
+%
+% This file is part of Dynare.
+%
+% Dynare is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation, either version 3 of the License, or
+% (at your option) any later version.
+%
+% Dynare is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+%
+% ------------------------------------------------
+% REFERENCES: Geweke (1992), `Evaluating the accuracy of sampling-based
+% approaches to the calculation of posterior moments', in J.O. Berger,
+% J.M. Bernardo, A.P. Dawid, and A.F.M. Smith (eds.) Proceedings of
+% the Fourth Valencia International Meeting on Bayesian Statistics,
+% pp. 169-194, Oxford University Press
+% Geweke (1999): `Using simulation methods for Bayesian econometric models: 
+% Inference, development and communication', Econometric Reviews, 18(1),
+% 1-73
+
+% written by: Johannes Pfeifer, 
+% based on code by James P. LeSage, who in turn 
+% drew on MATLAB programs written by Siddartha Chib 
+
+for k=1:length(options.convergence.geweke.taper_steps)+1;
+  NSE=[results1(:,3+(k-1)*2) results2(:,3+(k-1)*2)];
+  means=[results1(:,1) results2(:,1)];
+  diff_Means=means(:,1)-means(:,2);
+  sum_of_weights=sum(1./(NSE.^2),2);
+  pooled_mean=sum(means./(NSE.^2),2)./sum_of_weights;
+  pooled_NSE=1./sqrt(sum_of_weights);
+
+  test_stat=diff_Means.^2./sum(NSE.^2,2); 
+  p = 1-chi2cdf(test_stat,1);
+  results_struct.pooled_mean(:,k) = pooled_mean;
+  results_struct.pooled_nse(:,k) = pooled_NSE;
+  results_struct.prob_chi2_test(:,k) = p;
+end;
+
diff --git a/matlab/geweke_moments.m b/matlab/geweke_moments.m
new file mode 100644
index 000000000..242a9d550
--- /dev/null
+++ b/matlab/geweke_moments.m
@@ -0,0 +1,109 @@
+function [results_vec, results_struct] = geweke_moments(draws,Dynareoptions)
+%[results_vec, results_struct] = geweke_moments(draws,Dynareoptions)
+% PURPOSE: computes Gewke's convergence diagnostics NSE and RNE 
+%          (numerical std error and relative numerical efficiencies)
+
+% INPUTS 
+%   draws            [ndraws by 1 vector] 
+%   Dynareoptions    [structure]
+%  
+% OUTPUTS 
+%   results_vec
+%   results_struct   [structure]  containing the following fields:
+%          posteriormean= posterior parameter mean
+%          posteriorstd = posterior standard deviation
+%          nse_iid      = nse assuming no serial correlation for variable i
+%          rne_iid      = rne assuming no serial correlation for variable i
+%          nse_x        = nse using x% autocovariance tapered estimate
+%          rne_x        = rne using x% autocovariance taper
+% -----------------------------------------------------------------
+
+%
+% SPECIAL REQUIREMENTS
+%   None.
+
+% Copyright (C) 2013 Dynare Team
+%
+% This file is part of Dynare.
+%
+% Dynare is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation, either version 3 of the License, or
+% (at your option) any later version.
+%
+% Dynare is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+
+% REFERENCES: Geweke (1992), `Evaluating the accuracy of sampling-based
+% approaches to the calculation of posterior moments', in J.O. Berger,
+% J.M. Bernardo, A.P. Dawid, and A.F.M. Smith (eds.) Proceedings of
+% the Fourth Valencia International Meeting on Bayesian Statistics,
+% pp. 169-194, Oxford University Press
+% Geweke (1999): `Using simulation methods for Bayesian econometric models: 
+% Inference, development and communication', Econometric Reviews, 18(1),
+% 1-73
+% -----------------------------------------------------------------
+
+% written by: Johannes Pfeifer, 
+% based on code by James P. LeSage, who in turn 
+% drew on MATLAB programs written by Siddartha Chib 
+
+  
+ndraw = size(draws,1);
+n_groups=100;
+taper_steps=Dynareoptions.convergence.geweke.taper_steps;
+results_vec=zeros(1,4+2*length(taper_steps));
+
+ns = floor(ndraw/n_groups); %step_size
+n_draws_used = ns*n_groups; %effective number of draws used after rounding down
+
+window_means= zeros(n_groups,1);
+window_uncentered_variances= zeros(n_groups,1);
+for ig=1:n_groups;
+    window_means(ig,1)=sum(draws((ig-1)*ns+1:ig*ns,1))/ns;
+    window_uncentered_variances(ig,1)=sum(draws((ig-1)*ns+1:ig*ns,1).^2)/ns;        
+end; %for ig
+total_mean=mean(window_means);
+total_variance=mean(window_uncentered_variances)-total_mean^2;
+
+% save posterior means and std deviations to results_struct structure
+results_vec(1,1)=total_mean;
+results_vec(1,2)=sqrt(total_variance);
+results_struct.posteriormean = total_mean;
+results_struct.posteriorstd = results_vec(1,2);
+
+% numerical standard error assuming no serial correlation
+NSE=std(draws(1:n_draws_used,1),1)/sqrt(n_draws_used);
+% save to results_struct structure
+results_vec(1,3)=NSE;
+results_vec(1,4)=total_variance/(n_draws_used*NSE^2);
+results_struct.nse_iid = NSE;
+results_struct.rne_iid = results_vec(1,4);
+
+%get autocovariance of grouped means
+centered_window_means=window_means-total_mean;
+autocov_grouped_means=zeros(n_groups,1);
+for lag=0:n_groups-1;
+    autocov_grouped_means(lag+1)=centered_window_means(lag+1:n_groups,1)'*centered_window_means(1:n_groups-lag,1)/100;
+end;
+
+% numerical standard error with tapered autocovariance functions
+for taper_index=1:length(taper_steps)
+    taper=taper_steps(taper_index);
+    taper_lags=(1:taper-1)';
+    taper_lag_weight=1-taper_lags/taper;
+    tapered_sum_of_covariances=autocov_grouped_means(1)+sum(2*taper_lag_weight.*autocov_grouped_means(1+taper_lags));
+    NSE_taper=sqrt(tapered_sum_of_covariances/n_groups);
+    % save results_struct in structure
+    results_vec(1,4+taper_index*2-1)=NSE_taper;
+    results_vec(1,4+taper_index*2)=total_variance/(n_draws_used*NSE_taper^2);
+
+    eval(['results_struct.nse_taper_',num2str(taper),'= NSE_taper;']);
+    eval(['results_struct.rne_taper_',num2str(taper),'= total_variance/(n_draws_used*NSE_taper^2);']);
+end; % end of for mm loop
+
diff --git a/matlab/global_initialization.m b/matlab/global_initialization.m
index d06a054d9..c067ef33f 100644
--- a/matlab/global_initialization.m
+++ b/matlab/global_initialization.m
@@ -577,6 +577,10 @@ options_.osr.verbose=2;
 % use GPU
 options_.gpu = 0;
 
+%Geweke convergence diagnostics
+options_.convergence.geweke.taper_steps=[4 8 15];
+options_.convergence.geweke.geweke_interval=[0.2 0.5];
+
 % initialize persistent variables in priordens()
 priordens([],[],[],[],[],[],1);
 % initialize persistent variables in dyn_first_order_solver()