From 241fd074246269e94f6c0f8cfb3f04bf1fce0881 Mon Sep 17 00:00:00 2001 From: Johannes Pfeifer Date: Mon, 16 Sep 2013 17:19:56 +0200 Subject: [PATCH] Add Geweke 1992 convergence diagnostics --- matlab/McMCDiagnostics.m | 59 ++++++++++++++++-- matlab/dynare_estimation_1.m | 4 +- matlab/geweke_chi2_test.m | 74 ++++++++++++++++++++++ matlab/geweke_moments.m | 109 +++++++++++++++++++++++++++++++++ matlab/global_initialization.m | 4 ++ 5 files changed, 242 insertions(+), 8 deletions(-) create mode 100644 matlab/geweke_chi2_test.m create mode 100644 matlab/geweke_moments.m diff --git a/matlab/McMCDiagnostics.m b/matlab/McMCDiagnostics.m index d4a26cb8c..6491be936 100644 --- a/matlab/McMCDiagnostics.m +++ b/matlab/McMCDiagnostics.m @@ -1,4 +1,4 @@ -function McMCDiagnostics(options_, estim_params_, M_) +function oo_ = McMCDiagnostics(options_, estim_params_, M_, oo_) % function McMCDiagnostics % Computes convergence tests % @@ -8,7 +8,7 @@ function McMCDiagnostics(options_, estim_params_, M_) % M_ [structure] % % OUTPUTS -% none +% oo_ [structure] % % SPECIAL REQUIREMENTS % none @@ -38,10 +38,6 @@ MhDirectoryName = CheckPath('metropolis',M_.dname); TeX = options_.TeX; nblck = options_.mh_nblck; -% Brooks and Gelman tests need more than one block -if nblck == 1 - return; -end npar = estim_params_.nvx; npar = npar + estim_params_.nvn; npar = npar + estim_params_.ncx; @@ -56,6 +52,57 @@ NumberOfMcFilesPerBlock = size(dir([MhDirectoryName ,filesep, M_.fname '_mh*_blc % check if all previous files are there for block 1 check_presence_consecutive_MC_files(MhDirectoryName,M_.fname,1) +if nblck == 1 % Brooks and Gelman tests need more than one block + convergence_diagnostics_geweke=zeros(npar,4+2*length(options_.convergence.geweke.taper_steps)); + if any(options_.convergence.geweke.geweke_interval<0) || any(options_.convergence.geweke.geweke_interval>1) || length(any(options_.convergence.geweke.geweke_interval<0))~=2 ... + || (options_.convergence.geweke.geweke_interval(2)-options_.convergence.geweke.geweke_interval(1)<0) + fprintf('\nCONVERGENCE DIAGNOSTICS: Invalid option for geweke_interval. Using the default of [0.2 0.5].\n') + options_.convergence.geweke.geweke_interval=[0.2 0.5]; + end + first_obs_begin_sample = max(1,ceil(options_.mh_drop*options_.mh_replic)); + last_obs_begin_sample = first_obs_begin_sample+round(options_.convergence.geweke.geweke_interval(1)*options_.mh_replic*options_.mh_drop); + first_obs_end_sample = first_obs_begin_sample+round(options_.convergence.geweke.geweke_interval(2)*options_.mh_replic*options_.mh_drop); + param_name=[]; + for jj=1:npar + param_name = strvcat(param_name,get_the_name(jj,options_.TeX,M_,estim_params_,options_)); + end + fprintf('\nGeweke (1992) Convergence Tests, based on means of draws %d to %d vs %d to %d.\n',first_obs_begin_sample,last_obs_begin_sample,first_obs_end_sample,options_.mh_replic); + fprintf('p-values are for Chi2-test for equality of means.\n'); + Geweke_header={'Parameter', 'Post. Mean', 'Post. Std', 'p-val No Taper'}; + print_string=['%',num2str(size(param_name,2)+3),'s \t %12.3f \t %12.3f \t %12.3f']; + print_string_header=['%',num2str(size(param_name,2)+3),'s \t %12s \t %12s \t %12s']; + for ii=1:length(options_.convergence.geweke.taper_steps) + Geweke_header=[Geweke_header, ['p-val ' num2str(options_.convergence.geweke.taper_steps(ii)),'% Taper']]; + print_string=[print_string,'\t %12.3f']; + print_string_header=[print_string_header,'\t %12s']; + end + print_string=[print_string,'\n']; + print_string_header=[print_string_header,'\n']; + fprintf(print_string_header,Geweke_header{1,:}); + for jj=1:npar + startline=0; + for n = 1:NumberOfMcFilesPerBlock + load([MhDirectoryName '/' M_.fname '_mh',int2str(n),'_blck1.mat'],'x2'); + nx2 = size(x2,1); + param_draws(startline+(1:nx2),1) = x2(:,jj); + startline = startline + nx2; + end + [results_vec, results_struct] = geweke_moments(param_draws,options_); + convergence_diagnostics_geweke(jj,:)=results_vec; + + param_draws1 = param_draws(first_obs_begin_sample:last_obs_begin_sample,:); + param_draws2 = param_draws(first_obs_end_sample:end,:); + [results_vec1] = geweke_moments(param_draws1,options_); + [results_vec2] = geweke_moments(param_draws2,options_); + + results_struct = geweke_chi2_test(results_vec1,results_vec2,results_struct,options_); + eval(['oo_.convergence.geweke.',param_name(jj,:),'=results_struct;']) + fprintf(print_string,param_name(jj,:),results_struct.posteriormean,results_struct.posteriorstd,results_struct.prob_chi2_test) + end + skipline(2); + return; +end + for blck = 2:nblck tmp = size(dir([MhDirectoryName ,filesep, M_.fname '_mh*_blck' int2str(blck) '.mat']),1); if tmp~=NumberOfMcFilesPerBlock diff --git a/matlab/dynare_estimation_1.m b/matlab/dynare_estimation_1.m index 7e5071ded..73e24df17 100644 --- a/matlab/dynare_estimation_1.m +++ b/matlab/dynare_estimation_1.m @@ -611,8 +611,8 @@ if (any(bayestopt_.pshape >0 ) && options_.mh_replic) || ... CutSample(M_, options_, estim_params_); return else - if ~options_.nodiagnostic && options_.mh_replic > 2000 && options_.mh_nblck > 1 - McMCDiagnostics(options_, estim_params_, M_); + if ~options_.nodiagnostic && options_.mh_replic > 2000 + oo_= McMCDiagnostics(options_, estim_params_, M_,oo_); end %% Here i discard first half of the draws: CutSample(M_, options_, estim_params_); diff --git a/matlab/geweke_chi2_test.m b/matlab/geweke_chi2_test.m new file mode 100644 index 000000000..7ed383139 --- /dev/null +++ b/matlab/geweke_chi2_test.m @@ -0,0 +1,74 @@ +function results_struct = geweke_chi2_test(results1,results2,results_struct,options) +% results_struct = geweke_chi2_test(results1,results2,results_struct,options) +% PURPOSE: computes Geweke's chi-squared test for two sets of MCMC sample draws +% +% INPUTS +% results1 [1 by (4+n_taper*2) vector] vector with post. mean, +% std, NSE_iid, RNE_iid, and tapered NSE and RNE +% for chain part 1 +% results2 [1 by (4+n_taper*2) vector] vector with post. mean, +% std, NSE_iid, RNE_iid, and tapered NSE and RNE +% for chain part 2 +% results_struct [structure] results structure generated by geweke_moments +% Dynareoptions [structure] +% +% OUTPUTS +% results_struct [structure] containing the following fields: +% pooled_mean Pooled mean of the chain parts, weighted +% with precision +% rpooled_nse Pooled NSE of the chain parts, weighted +% with precision +% prob_chi2_test p-value of Chi2-test for equality of +% means in both chain parts +% ----------------------------------------------------------------- + +% +% SPECIAL REQUIREMENTS +% None. + +% Copyright (C) 2013 Dynare Team +% +% This file is part of Dynare. +% +% Dynare is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% Dynare is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with Dynare. If not, see . +% +% ------------------------------------------------ +% REFERENCES: Geweke (1992), `Evaluating the accuracy of sampling-based +% approaches to the calculation of posterior moments', in J.O. Berger, +% J.M. Bernardo, A.P. Dawid, and A.F.M. Smith (eds.) Proceedings of +% the Fourth Valencia International Meeting on Bayesian Statistics, +% pp. 169-194, Oxford University Press +% Geweke (1999): `Using simulation methods for Bayesian econometric models: +% Inference, development and communication', Econometric Reviews, 18(1), +% 1-73 + +% written by: Johannes Pfeifer, +% based on code by James P. LeSage, who in turn +% drew on MATLAB programs written by Siddartha Chib + +for k=1:length(options.convergence.geweke.taper_steps)+1; + NSE=[results1(:,3+(k-1)*2) results2(:,3+(k-1)*2)]; + means=[results1(:,1) results2(:,1)]; + diff_Means=means(:,1)-means(:,2); + sum_of_weights=sum(1./(NSE.^2),2); + pooled_mean=sum(means./(NSE.^2),2)./sum_of_weights; + pooled_NSE=1./sqrt(sum_of_weights); + + test_stat=diff_Means.^2./sum(NSE.^2,2); + p = 1-chi2cdf(test_stat,1); + results_struct.pooled_mean(:,k) = pooled_mean; + results_struct.pooled_nse(:,k) = pooled_NSE; + results_struct.prob_chi2_test(:,k) = p; +end; + diff --git a/matlab/geweke_moments.m b/matlab/geweke_moments.m new file mode 100644 index 000000000..242a9d550 --- /dev/null +++ b/matlab/geweke_moments.m @@ -0,0 +1,109 @@ +function [results_vec, results_struct] = geweke_moments(draws,Dynareoptions) +%[results_vec, results_struct] = geweke_moments(draws,Dynareoptions) +% PURPOSE: computes Gewke's convergence diagnostics NSE and RNE +% (numerical std error and relative numerical efficiencies) + +% INPUTS +% draws [ndraws by 1 vector] +% Dynareoptions [structure] +% +% OUTPUTS +% results_vec +% results_struct [structure] containing the following fields: +% posteriormean= posterior parameter mean +% posteriorstd = posterior standard deviation +% nse_iid = nse assuming no serial correlation for variable i +% rne_iid = rne assuming no serial correlation for variable i +% nse_x = nse using x% autocovariance tapered estimate +% rne_x = rne using x% autocovariance taper +% ----------------------------------------------------------------- + +% +% SPECIAL REQUIREMENTS +% None. + +% Copyright (C) 2013 Dynare Team +% +% This file is part of Dynare. +% +% Dynare is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% Dynare is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with Dynare. If not, see . + +% REFERENCES: Geweke (1992), `Evaluating the accuracy of sampling-based +% approaches to the calculation of posterior moments', in J.O. Berger, +% J.M. Bernardo, A.P. Dawid, and A.F.M. Smith (eds.) Proceedings of +% the Fourth Valencia International Meeting on Bayesian Statistics, +% pp. 169-194, Oxford University Press +% Geweke (1999): `Using simulation methods for Bayesian econometric models: +% Inference, development and communication', Econometric Reviews, 18(1), +% 1-73 +% ----------------------------------------------------------------- + +% written by: Johannes Pfeifer, +% based on code by James P. LeSage, who in turn +% drew on MATLAB programs written by Siddartha Chib + + +ndraw = size(draws,1); +n_groups=100; +taper_steps=Dynareoptions.convergence.geweke.taper_steps; +results_vec=zeros(1,4+2*length(taper_steps)); + +ns = floor(ndraw/n_groups); %step_size +n_draws_used = ns*n_groups; %effective number of draws used after rounding down + +window_means= zeros(n_groups,1); +window_uncentered_variances= zeros(n_groups,1); +for ig=1:n_groups; + window_means(ig,1)=sum(draws((ig-1)*ns+1:ig*ns,1))/ns; + window_uncentered_variances(ig,1)=sum(draws((ig-1)*ns+1:ig*ns,1).^2)/ns; +end; %for ig +total_mean=mean(window_means); +total_variance=mean(window_uncentered_variances)-total_mean^2; + +% save posterior means and std deviations to results_struct structure +results_vec(1,1)=total_mean; +results_vec(1,2)=sqrt(total_variance); +results_struct.posteriormean = total_mean; +results_struct.posteriorstd = results_vec(1,2); + +% numerical standard error assuming no serial correlation +NSE=std(draws(1:n_draws_used,1),1)/sqrt(n_draws_used); +% save to results_struct structure +results_vec(1,3)=NSE; +results_vec(1,4)=total_variance/(n_draws_used*NSE^2); +results_struct.nse_iid = NSE; +results_struct.rne_iid = results_vec(1,4); + +%get autocovariance of grouped means +centered_window_means=window_means-total_mean; +autocov_grouped_means=zeros(n_groups,1); +for lag=0:n_groups-1; + autocov_grouped_means(lag+1)=centered_window_means(lag+1:n_groups,1)'*centered_window_means(1:n_groups-lag,1)/100; +end; + +% numerical standard error with tapered autocovariance functions +for taper_index=1:length(taper_steps) + taper=taper_steps(taper_index); + taper_lags=(1:taper-1)'; + taper_lag_weight=1-taper_lags/taper; + tapered_sum_of_covariances=autocov_grouped_means(1)+sum(2*taper_lag_weight.*autocov_grouped_means(1+taper_lags)); + NSE_taper=sqrt(tapered_sum_of_covariances/n_groups); + % save results_struct in structure + results_vec(1,4+taper_index*2-1)=NSE_taper; + results_vec(1,4+taper_index*2)=total_variance/(n_draws_used*NSE_taper^2); + + eval(['results_struct.nse_taper_',num2str(taper),'= NSE_taper;']); + eval(['results_struct.rne_taper_',num2str(taper),'= total_variance/(n_draws_used*NSE_taper^2);']); +end; % end of for mm loop + diff --git a/matlab/global_initialization.m b/matlab/global_initialization.m index d06a054d9..c067ef33f 100644 --- a/matlab/global_initialization.m +++ b/matlab/global_initialization.m @@ -577,6 +577,10 @@ options_.osr.verbose=2; % use GPU options_.gpu = 0; +%Geweke convergence diagnostics +options_.convergence.geweke.taper_steps=[4 8 15]; +options_.convergence.geweke.geweke_interval=[0.2 0.5]; + % initialize persistent variables in priordens() priordens([],[],[],[],[],[],1); % initialize persistent variables in dyn_first_order_solver()