gsa: add corrcoef from octave forge nan package. closes #796

2014-11-24 16:06:05 +01:00 · 2014-11-24 16:06:05 +01:00 · 103788bb5b
parent 2254f846c5
commit 103788bb5b
5 changed files with 658 additions and 0 deletions
--- a/license.txt
+++ b/license.txt
@ -89,6 +89,12 @@ Copyright: 2007 Muthiah Annamalai <muthiah.annamalai@uta.edu>
           2013 Dynare Team
 License: GPL-3+

+Files: matlab/missing/corrcoef/corrcoef.m matlab/missing/corrcoef/sumskipnan.m
+ matlab/missing/corrcoef/flag_implicit_skip_nan.m
+Copyright: 2000-2005,2008,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
+           2014 Dynare Team
+License: GPL-3+
+
 Files: matlab/lmmcp/catstruct.m
 Copyright: 2005 Jos van der Geest <jos@jasen.nl>
           2013 Christophe Gouel
--- a/matlab/dynare_config.m
+++ b/matlab/dynare_config.m
@ -91,6 +91,11 @@ if isoctave
    addpath([dynareroot '/missing/ilu'])
 end

+% corrcoef with two outputs is missing in Octave (ticket #796)
+if isoctave && ~user_has_octave_forge_package('nan')
+    addpath([dynareroot '/missing/corrcoef'])
+end
+
 % strjoin is missing in older versions of MATLAB and in Octave < 3.8
 if (isoctave && octave_ver_less_than('3.8')) || ...
        (~isoctave && matlab_ver_less_than('8.1'))
--- a/matlab/missing/corrcoef/corrcoef.m
+++ b/matlab/missing/corrcoef/corrcoef.m
@ -0,0 +1,387 @@
+function [R,sig,ci1,ci2,nan_sig] = corrcoef(X,Y,varargin)
+% CORRCOEF calculates the correlation matrix from pairwise correlations.
+%   The input data can contain missing values encoded with NaN.
+%   Missing data (NaN's) are handled by pairwise deletion [15].
+%   In order to avoid possible pitfalls, use case-wise deletion or
+%   or check the correlation of NaN's with your data (see below).
+%   A significance test for testing the Hypothesis
+%   'correlation coefficient R is significantly different to zero'
+%   is included.
+%
+% [...] = CORRCOEF(X);
+%      calculates the (auto-)correlation matrix of X
+% [...] = CORRCOEF(X,Y);
+%      calculates the crosscorrelation between X and Y
+%
+% [...] = CORRCOEF(..., Mode);
+%       Mode='Pearson' or 'parametric' [default]
+%               gives the correlation coefficient
+%               also known as the 'product-moment coefficient of correlation'
+%               or 'Pearson''s correlation' [1]
+%       Mode='Spearman'     gives 'Spearman''s Rank Correlation Coefficient'
+%               This replaces SPEARMAN.M
+%       Mode='Rank'         gives a nonparametric Rank Correlation Coefficient
+%               This is the "Spearman rank correlation with proper handling of ties"
+%               This replaces RANKCORR.M
+%
+% [...] = CORRCOEF(..., param1, value1, param2, value2, ... );
+%	param       value
+%	'Mode'		type of correlation
+%		'Pearson','parametric'
+%		'Spearman'
+%		'rank'
+%	'rows'		how do deal with missing values encoded as NaN's.
+%		'complete': remove all rows with at least one NaN
+%		'pairwise': [default]
+%	'alpha'		0.01	: significance level to compute confidence interval
+%
+% [R,p,ci1,ci2,nansig] = CORRCOEF(...);
+%   R is the correlation matrix
+%	R(i,j) is the correlation coefficient r between X(:,i) and Y(:,j)
+%  p    gives the significance of R
+%	It tests the null hypothesis that the product moment correlation coefficient is zero
+%       using Student's t-test on the statistic t = r*sqrt(N-2)/sqrt(1-r^2)
+%       where N is the number of samples (Statistics, M. Spiegel, Schaum series).
+%  p > alpha: do not reject the Null hypothesis: 'R is zero'.
+%  p < alpha: The alternative hypothesis 'R is larger than zero' is true with probability (1-alpha).
+%  ci1	lower (1-alpha) confidence interval
+%  ci2	upper (1-alpha) confidence interval
+%	If no alpha is provided, the default alpha is 0.01. This can be changed with function flag_implicit_significance.
+%  nan_sig  p-value whether H0: 'NaN''s are not correlated' could be correct
+%       if nan_sig < alpha, H1 ('NaNs are correlated') is very likely.
+%
+% The result is only valid if the occurence of NaN's is uncorrelated. In
+% order to avoid this pitfall, the correlation of NaN's should be checked
+% or case-wise deletion should be applied.
+%   Case-Wise deletion can be implemented
+%    ix = ~any(isnan([X,Y]),2);
+%    [...] = CORRCOEF(X(ix,:),Y(ix,:),...);
+%
+%  Correlation (non-random distribution) of NaN's can be checked with
+%       [nan_R,nan_sig]=corrcoef(X,isnan(X))
+%   or  [nan_R,nan_sig]=corrcoef([X,Y],isnan([X,Y]))
+%   or  [R,p,ci1,ci2] = CORRCOEF(...);
+%
+% Further recommandation related to the correlation coefficient:
+% + LOOK AT THE SCATTERPLOTS to make sure that the relationship is linear
+% + Correlation is not causation because
+%	it is not clear which parameter is 'cause' and which is 'effect' and
+%       the observed correlation between two variables might be due to the action of other, unobserved variables.
+%
+% see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS,
+%       PARTCORRCOEF, flag_implicit_significance
+%
+% REFERENCES:
+% on the correlation coefficient
+% [ 1] http://mathworld.wolfram.com/CorrelationCoefficient.html
+% [ 2] http://www.geography.btinternet.co.uk/spearman.htm
+% [ 3] Hogg, R. V. and Craig, A. T. Introduction to Mathematical Statistics, 5th ed.  New York: Macmillan, pp. 338 and 400, 1995.
+% [ 4] Lehmann, E. L. and D'Abrera, H. J. M. Nonparametrics: Statistical Methods Based on Ranks, rev. ed. Englewood Cliffs, NJ: Prentice-Hall, pp. 292, 300, and 323, 1998.
+% [ 5] Press, W. H.; Flannery, B. P.; Teukolsky, S. A.; and Vetterling, W. T. Numerical Recipes in FORTRAN: The Art of Scientific Computing, 2nd ed. Cambridge, England: Cambridge University Press, pp. 634-637, 1992
+% [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html
+% on the significance test of the correlation coefficient
+% [11] http://www.met.rdg.ac.uk/cag/STATS/corr.html
+% [12] http://www.janda.org/c10/Lectures/topic06/L24-significanceR.htm
+% [13] http://faculty.vassar.edu/lowry/ch4apx.html
+% [14] http://davidmlane.com/hyperstat/B134689.html
+% [15] http://www.statsoft.com/textbook/stbasic.html%Correlations
+% others
+% [20] http://www.tufts.edu/~gdallal/corr.htm
+% [21] Fisher transformation http://en.wikipedia.org/wiki/Fisher_transformation
+
+%       $Id: corrcoef.m 9387 2011-12-15 10:42:14Z schloegl $
+%       Copyright (C) 2000-2004,2008,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
+%       Copyright (C) 2014 Dynare Team
+%       This function is part of the NaN-toolbox
+%       http://pub.ist.ac.at/~schloegl/matlab/NaN/
+
+%    This program is free software: you can redistribute it and/or modify
+%    it under the terms of the GNU General Public License as published by
+%    the Free Software Foundation, either version 3 of the License, or
+%    (at your option) any later version.
+%
+%    This program is distributed in the hope that it will be useful,
+%    but WITHOUT ANY WARRANTY; without even the implied warranty of
+%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+%    GNU General Public License for more details.
+%
+%    You should have received a copy of the GNU General Public License
+%    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+% Features:
+% + handles missing values (encoded as NaN's)
+%       + pairwise deletion of missing data
+%       + checks independence of missing values (NaNs)
+% + parametric and non-parametric (rank) correlation
+%       + Pearson's correlation
+%       + Spearman's rank correlation
+%       + Rank correlation (non-parametric, Spearman rank correlation with proper handling of ties)
+% + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks
+% + significance test for null-hypthesis: r=0
+% + confidence interval included
+% - rank correlation works for cell arrays, too (no check for missing values).
+% + compatible with Octave and Matlab
+
+global FLAG_NANS_OCCURED;
+
+NARG = nargout;	% needed because nargout is not reentrant in Octave, and corrcoef is recursive
+mode = [];
+
+if nargin==1
+        Y = [];
+        Mode='Pearson';
+elseif nargin==0
+        fprintf(2,'Error CORRCOEF: Missing argument(s)\n');
+elseif nargin>1
+        if ischar(Y)
+                varg = [Y,varargin];
+                Y=[];
+        else
+                varg = varargin;
+        end;
+
+        if length(varg)<1,
+                Mode = 'Pearson';
+        elseif length(varg)==1,
+                Mode = varg{1};
+        else
+                for k = 2:2:length(varg),
+                        mode = setfield(mode,lower(varg{k-1}),varg{k});
+                end;
+                if isfield(mode,'mode')
+                        Mode = mode.mode;
+                end;
+        end;
+end;
+if isempty(Mode) Mode='pearson'; end;
+Mode=[Mode,'        '];
+
+
+
+FLAG_WARNING = warning;		% save warning status
+warning('off');
+
+[r1,c1]=size(X);
+if ~isempty(Y)
+        [r2,c2]=size(Y);
+        if r1~=r2,
+                fprintf(2,'Error CORRCOEF: X and Y must have the same number of observations (rows).\n');
+                return;
+        end;
+        NN = real(~isnan(X)')*real(~isnan(Y));
+else
+        [r2,c2]=size(X);
+        NN = real(~isnan(X)')*real(~isnan(X));
+end;
+
+%%%%% generate combinations using indices for pairwise calculation of the correlation
+YESNAN = any(isnan(X(:))) | any(isnan(Y(:)));
+if YESNAN,
+    FLAG_NANS_OCCURED=(1==1);
+    if isfield(mode,'rows')
+        if strcmp(mode.rows,'complete')
+            ix = ~any([X,Y],2);
+            X = X(ix,:);
+            if ~isempty(Y)
+                Y = Y(ix,:);
+            end;
+            YESNAN = 0;
+            NN = size(X,1);
+        elseif strcmp(mode.rows,'all')
+            fprintf(1,'Warning: data contains NaNs, rows=pairwise is used.');
+            %%NN(NN < size(X,1)) = NaN;
+        elseif strcmp(mode.rows,'pairwise')
+            %%% default
+        end;
+    end;
+end;
+if isempty(Y),
+        IX = ones(c1)-diag(ones(c1,1));
+        [jx, jy ] = find(IX);
+        [jxo,jyo] = find(IX);
+    R = eye(c1);
+else
+        IX = sparse([],[],[],c1+c2,c1+c2,c1*c2);
+        IX(1:c1,c1+(1:c2)) = 1;
+        [jx,jy] = find(IX);
+
+        IX = ones(c1,c2);
+        [jxo,jyo] = find(IX);
+    R = zeros(c1,c2);
+end;
+
+if strcmp(lower(Mode(1:7)),'pearson');
+        % see http://mathworld.wolfram.com/CorrelationCoefficient.html
+    if ~YESNAN,
+                [S,N,SSQ] = sumskipnan(X,1);
+                if ~isempty(Y),
+                    [S2,N2,SSQ2] = sumskipnan(Y,1);
+                        CC = X'*Y;
+                        M1 = S./N;
+                        M2 = S2./N2;
+                        cc = CC./NN - M1'*M2;
+                        R  = cc./sqrt((SSQ./N-M1.*M1)'*(SSQ2./N2-M2.*M2));
+                else
+                        CC = X'*X;
+                        M  = S./N;
+                        cc = CC./NN - M'*M;
+                        v  = SSQ./N - M.*M; %max(N-1,0);
+                        R  = cc./sqrt(v'*v);
+                end;
+        else
+                if ~isempty(Y),
+                        X  = [X,Y];
+                end;
+                for k = 1:length(jx),
+                        %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2);
+                        ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k)));
+                        [s,n,s2] = sumskipnan(X(ik,[jx(k),jy(k)]),1);
+                        v  = (s2-s.*s./n)./n;
+                        cc = X(ik,jx(k))'*X(ik,jy(k));
+                        cc = cc/n(1) - prod(s./n);
+                        %r(k) = cc./sqrt(prod(v));
+                        R(jxo(k),jyo(k)) = cc./sqrt(prod(v));
+                end;
+    end
+
+elseif strcmp(lower(Mode(1:4)),'rank');
+        % see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html
+    if ~YESNAN,
+                if isempty(Y)
+                    R = corrcoef(ranks(X));
+                else
+                        R = corrcoef(ranks(X),ranks(Y));
+                end;
+        else
+                if ~isempty(Y),
+                        X = [X,Y];
+                end;
+                for k = 1:length(jx),
+                        %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2);
+                        ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k)));
+                        il = ranks(X(ik,[jx(k),jy(k)]));
+                        R(jxo(k),jyo(k)) = corrcoef(il(:,1),il(:,2));
+                end;
+        X = ranks(X);
+    end;
+
+elseif strcmp(lower(Mode(1:8)),'spearman');
+        % see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html
+        if ~isempty(Y),
+                X = [X,Y];
+        end;
+
+        n = repmat(nan,c1,c2);
+
+        if ~YESNAN,
+                iy = ranks(X);	%  calculates ranks;
+
+                for k = 1:length(jx),
+                        [R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((iy(:,jx(k)) - iy(:,jy(k))).^2);	% NN is the number of non-missing values
+                end;
+        else
+                for k = 1:length(jx),
+                        %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2);
+                        ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k)));
+                        il = ranks(X(ik,[jx(k),jy(k)]));
+                        % NN is the number of non-missing values
+                        [R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((il(:,1) - il(:,2)).^2);
+                end;
+        X = ranks(X);
+        end;
+        R = 1 - 6 * R ./ (n.*(n.*n-1));
+
+elseif strcmp(lower(Mode(1:7)),'partial');
+        fprintf(2,'Error CORRCOEF: use PARTCORRCOEF \n',Mode);
+
+        return;
+
+elseif strcmp(lower(Mode(1:7)),'kendall');
+        fprintf(2,'Error CORRCOEF: mode ''%s'' not implemented yet.\n',Mode);
+
+        return;
+else
+        fprintf(2,'Error CORRCOEF: unknown mode ''%s''\n',Mode);
+end;
+
+if (NARG<2),
+        warning(FLAG_WARNING);  % restore warning status
+        return;
+end;
+
+
+% CONFIDENCE INTERVAL
+if isfield(mode,'alpha')
+    alpha = mode.alpha;
+elseif exist('flag_implicit_significance','file'),
+        alpha = flag_implicit_significance;
+else
+    alpha = 0.01;
+end;
+% fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha);
+
+
+% SIGNIFICANCE TEST
+R(isnan(R))=0;
+tmp = 1 - R.*R;
+tmp(tmp<0) = 0;		% prevent tmp<0 i.e. imag(t)~=0
+t   = R.*sqrt(max(NN-2,0)./tmp);
+
+if exist('t_cdf','file');
+        sig = t_cdf(t,NN-2);
+elseif exist('tcdf','file')>1;
+        sig = tcdf(t,NN-2);
+else
+        fprintf('CORRCOEF: significance test not completed because of missing TCDF-function\n')
+        sig = repmat(nan,size(R));
+end;
+sig  = 2 * min(sig,1 - sig);
+
+
+if NARG<3,
+    warning(FLAG_WARNING);  % restore warning status
+        return;
+end;
+
+
+tmp = R;
+%tmp(ix1 | ix2) = nan;		% avoid division-by-zero warning
+z   = log((1+tmp)./(1-tmp))/2;  % Fisher transformation [21]
+%sz = 1./sqrt(NN-3);		% standard error of z
+sz  = sqrt(2)*erfinv(1-alpha)./sqrt(NN-3);	% confidence interval for alpha of z
+
+ci1 = tanh(z-sz);
+ci2 = tanh(z+sz);
+
+%ci1(isnan(ci1))=R(isnan(ci1));	% in case of isnan(ci), the interval limits are exactly the R value
+%ci2(isnan(ci2))=R(isnan(ci2));
+
+if (NARG<5) || ~YESNAN,
+    nan_sig = repmat(NaN,size(R));
+    warning(FLAG_WARNING);  % restore warning status
+    return;
+end;
+
+%%%%% ----- check independence of NaNs (missing values) -----
+[nan_R, nan_sig] = corrcoef(X,double(isnan(X)));
+
+% remove diagonal elements, because these have not any meaning %
+nan_sig(isnan(nan_R)) = nan;
+% remove diagonal elements, because these have not any meaning %
+nan_R(isnan(nan_R)) = 0;
+
+if 0, any(nan_sig(:) < alpha),
+        tmp = nan_sig(:);			% Hack to skip NaN's in MIN(X)
+        min_sig = min(tmp(~isnan(tmp)));    % Necessary, because Octave returns NaN rather than min(X) for min(NaN,X)
+        fprintf(1,'CORRCOFF Warning: Missing Values (i.e. NaNs) are not independent of data (p-value=%f)\n', min_sig);
+        fprintf(1,'   Its recommended to remove all samples (i.e. rows) with any missing value (NaN).\n');
+        fprintf(1,'   The null-hypotheses (NaNs are uncorrelated) is rejected for the following parameter pair(s).\n');
+        [ix,iy] = find(nan_sig < alpha);
+        disp([ix,iy])
+end;
+
+%%%%% ----- end of independence check ------
+
+warning(FLAG_WARNING);  % restore warning status
+return;
+
--- a/matlab/missing/corrcoef/flag_implicit_skip_nan.m
+++ b/matlab/missing/corrcoef/flag_implicit_skip_nan.m
@ -0,0 +1,66 @@
+function FLAG = flag_implicit_skip_nan(i)
+% FLAG_IMPLICIT_SKIP_NAN sets and gets default mode for handling NaNs
+%	1 skips NaN's (the default mode if no mode is set)
+%   0 NaNs are propagated; input NaN's give NaN's at the output
+%
+% FLAG = flag_implicit_skip_nan()
+%   gets current mode
+%
+% flag_implicit_skip_nan(FLAG)    % sets mode
+%
+% prevFLAG = flag_implicit_skip_nan(nextFLAG)
+%	gets previous set FLAG and sets FLAG for the future
+% flag_implicit_skip_nan(prevFLAG)
+%	resets FLAG to previous mode
+%
+% It is used in:
+%	SUMSKIPNAN, MEDIAN, QUANTILES, TRIMEAN
+% and affects many other functions like:
+%	CENTER, KURTOSIS, MAD, MEAN, MOMENT, RMS, SEM, SKEWNESS,
+%	STATISTIC, STD, VAR, ZSCORE etc.
+%
+% The mode is stored in the global variable FLAG_implicit_skip_nan
+% It is recommended to use flag_implicit_skip_nan(1) as default and
+% flag_implicit_skip_nan(0) should be used for exceptional cases only.
+% This feature might disappear without further notice, so you should really not
+% rely on it.
+
+
+%    This program is free software; you can redistribute it and/or modify
+%    it under the terms of the GNU General Public License as published by
+%    the Free Software Foundation; either version 3 of the License, or
+%    (at your option) any later version.
+%
+%    This program is distributed in the hope that it will be useful,
+%    but WITHOUT ANY WARRANTY; without even the implied warranty of
+%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+%    GNU General Public License for more details.
+%
+%    You should have received a copy of the GNU General Public License
+%    along with this program; if not, write to the Free Software
+%    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+
+%	$Id: flag_implicit_skip_nan.m 8351 2011-06-24 17:35:07Z carandraug $
+%   Copyright (C) 2001-2003,2009 by Alois Schloegl <alois.schloegl@gmail.com>
+%   Copyright (C) 2014 Dynare Team
+%       This function is part of the NaN-toolbox
+%       http://pub.ist.ac.at/~schloegl/matlab/NaN/
+
+
+persistent FLAG_implicit_skip_nan;
+
+%% if strcmp(version,'3.6'), FLAG_implicit_skip_nan=(1==1); end;	%% hack for the use with Freemat3.6
+
+%%% set DEFAULT value of FLAG
+if isempty(FLAG_implicit_skip_nan),
+    FLAG_implicit_skip_nan = (1==1); %logical(1); % logical.m not available on 2.0.16
+end;
+
+FLAG = FLAG_implicit_skip_nan;
+if nargin>0,
+    FLAG_implicit_skip_nan = (i~=0); %logical(i); %logical.m not available in 2.0.16
+    if (~i)
+        warning('flag_implicit_skipnan(0): You are warned!!! You have turned off skipping NaN in sumskipnan. This is not recommended. Make sure you really know what you do.')
+    end;
+end;
+
--- a/matlab/missing/corrcoef/sumskipnan.m
+++ b/matlab/missing/corrcoef/sumskipnan.m
@ -0,0 +1,194 @@
+function [o,count,SSQ] = sumskipnan(x, DIM, W)
+% SUMSKIPNAN adds all non-NaN values.
+%
+% All NaN's are skipped; NaN's are considered as missing values.
+% SUMSKIPNAN of NaN's only  gives O; and the number of valid elements is return.
+% SUMSKIPNAN is also the elementary function for calculating
+% various statistics (e.g. MEAN, STD, VAR, RMS, MEANSQ, SKEWNESS,
+% KURTOSIS, MOMENT, STATISTIC etc.) from data with missing values.
+% SUMSKIPNAN implements the DIMENSION-argument for data with missing values.
+% Also the second output argument return the number of valid elements (not NaNs)
+%
+% Y = sumskipnan(x [,DIM])
+% [Y,N,SSQ] = sumskipnan(x [,DIM])
+% [...] = sumskipnan(x, DIM, W)
+%
+% x	input data
+% DIM	dimension (default: [])
+%	empty DIM sets DIM to first non singleton dimension
+% W	weight vector for weighted sum, numel(W) must fit size(x,DIM)
+% Y	resulting sum
+% N	number of valid (not missing) elements
+% SSQ	sum of squares
+%
+% the function FLAG_NANS_OCCURED() returns whether any value in x
+%  is a not-a-number (NaN)
+%
+% features:
+% - can deal with NaN's (missing values)
+% - implements dimension argument.
+% - computes weighted sum
+% - compatible with Matlab and Octave
+%
+% see also: FLAG_NANS_OCCURED, SUM, NANSUM, MEAN, STD, VAR, RMS, MEANSQ,
+%      SSQ, MOMENT, SKEWNESS, KURTOSIS, SEM
+
+
+%    This program is free software; you can redistribute it and/or modify
+%    it under the terms of the GNU General Public License as published by
+%    the Free Software Foundation; either version 3 of the License, or
+%    (at your option) any later version.
+%
+%    This program is distributed in the hope that it will be useful,
+%    but WITHOUT ANY WARRANTY; without even the implied warranty of
+%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+%    GNU General Public License for more details.
+%
+%    You should have received a copy of the GNU General Public License
+%    along with this program; If not, see <http://www.gnu.org/licenses/>.
+
+%	$Id: sumskipnan.m 9033 2011-11-08 20:58:07Z schloegl $
+%       Copyright (C) 2000-2005,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
+%       Copyright (C) 2014 Dynare Team
+%       This function is part of the NaN-toolbox
+%       http://pub.ist.ac.at/~schloegl/matlab/NaN/
+
+
+global FLAG_NANS_OCCURED;
+
+if nargin<2,
+        DIM = [];
+end;
+if nargin<3,
+        W = [];
+end;
+
+% an efficient implementation in C of the following lines
+% could significantly increase performance
+% only one loop and only one check for isnan is needed
+% An MEX-Implementation is available in sumskipnan.cpp
+%
+% Outline of the algorithm:
+% for { k=1,o=0,count=0; k++; k<N}
+%   if ~isnan(i(k))
+%   {   o     += x(k);
+%               count += 1;
+%		tmp    = x(k)*x(k)
+%		o2    += tmp;
+%		o3    += tmp.*tmp;
+%       };
+
+if isempty(DIM),
+        DIM = find(size(x)>1,1);
+        if isempty(DIM), DIM = 1; end;
+end
+if (DIM<1), DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% non-float data
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+if  (isempty(W) && (~(isa(x,'float') || isa(x,'double')))) || ~flag_implicit_skip_nan(), %%% skip always NaN's
+    if ~isempty(W)
+        error('SUMSKIPNAN: weighted sum of integers not supported, yet');
+    end;
+    x = double(x);
+    o = sum(x,DIM);
+    if nargout>1
+        sz = size(x);
+        N  = sz(DIM);
+        sz(DIM) = 1;
+        count = repmat(N,sz);
+        if nargout>2
+            x = x.*x;
+            SSQ = sum(x,DIM);
+        end;
+    end;
+    return;
+end;
+
+if (length(size(x))<DIM)
+    error('SUMSKIPNAN: DIM argument larger than number of dimensions of x');
+elseif ~isempty(W) && (size(x,DIM)~=numel(W))
+    error('SUMSKIPNAN: size of weight vector does not match size(x,DIM)');
+end;
+
+%% mex and oct files expect double
+x = double(x);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% use Matlab-MEX function when available
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%if 1,
+try
+
+    %% using sumskipnan_mex.mex
+
+    %% !!! hack: FLAG_NANS_OCCURED is an output argument, reserve memory !!!
+    if isempty(FLAG_NANS_OCCURED),
+        FLAG_NANS_OCCURED = logical(0);  % default value
+    end;
+
+    if (nargout<2),
+        o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
+        if (~isreal(x))
+            io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
+            o  = o + i*io;
+        end;
+        return;
+    elseif (nargout==2),
+        [o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
+        if (~isreal(x))
+            [io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
+            if any(count(:)-icount(:))
+                error('Number of NaNs differ for REAL and IMAG part');
+            else
+                o  = o+i*io;
+            end;
+        end;
+        return;
+    elseif (nargout>=3),
+        [o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
+        if (~isreal(x))
+            [io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
+            if any(count(:)-icount(:))
+                error('Number of NaNs differ for REAL and IMAG part');
+            else
+                o  = o+i*io;
+                SSQ = SSQ+iSSQ;
+            end;
+        end;
+        return;
+    end;
+end;
+
+if ~isempty(W)
+    error('weighted sumskipnan requires sumskipnan_mex');
+end;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% count non-NaN's
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+if nargout>1,
+        count = sum(x==x,DIM);
+    FLAG_NANS_OCCURED = any(count(:)<size(x,DIM));
+end;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% replace NaN's with zero
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+x(x~=x) = 0;
+o = sum(x,DIM);
+
+if nargout>2,
+        x = real(x).^2 + imag(x).^2;
+        SSQ = sum(x,DIM);
+end;
+
+%!assert(sumskipnan([1,2],1),[1,2])
+%!assert(sumskipnan([1,NaN],2),1)
+%!assert(sumskipnan([1,NaN],2),1)
+%!assert(sumskipnan([nan,1,4,5]),10)
+%!assert(sumskipnan([nan,1,4,5]',1,[3;2;1;0]),6)
+
+
+