gsa: add corrcoef from octave forge nan package. closes #796
parent
2254f846c5
commit
103788bb5b
|
@ -89,6 +89,12 @@ Copyright: 2007 Muthiah Annamalai <muthiah.annamalai@uta.edu>
|
|||
2013 Dynare Team
|
||||
License: GPL-3+
|
||||
|
||||
Files: matlab/missing/corrcoef/corrcoef.m matlab/missing/corrcoef/sumskipnan.m
|
||||
matlab/missing/corrcoef/flag_implicit_skip_nan.m
|
||||
Copyright: 2000-2005,2008,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
|
||||
2014 Dynare Team
|
||||
License: GPL-3+
|
||||
|
||||
Files: matlab/lmmcp/catstruct.m
|
||||
Copyright: 2005 Jos van der Geest <jos@jasen.nl>
|
||||
2013 Christophe Gouel
|
||||
|
|
|
@ -91,6 +91,11 @@ if isoctave
|
|||
addpath([dynareroot '/missing/ilu'])
|
||||
end
|
||||
|
||||
% corrcoef with two outputs is missing in Octave (ticket #796)
|
||||
if isoctave && ~user_has_octave_forge_package('nan')
|
||||
addpath([dynareroot '/missing/corrcoef'])
|
||||
end
|
||||
|
||||
% strjoin is missing in older versions of MATLAB and in Octave < 3.8
|
||||
if (isoctave && octave_ver_less_than('3.8')) || ...
|
||||
(~isoctave && matlab_ver_less_than('8.1'))
|
||||
|
|
|
@ -0,0 +1,387 @@
|
|||
function [R,sig,ci1,ci2,nan_sig] = corrcoef(X,Y,varargin)
|
||||
% CORRCOEF calculates the correlation matrix from pairwise correlations.
|
||||
% The input data can contain missing values encoded with NaN.
|
||||
% Missing data (NaN's) are handled by pairwise deletion [15].
|
||||
% In order to avoid possible pitfalls, use case-wise deletion or
|
||||
% or check the correlation of NaN's with your data (see below).
|
||||
% A significance test for testing the Hypothesis
|
||||
% 'correlation coefficient R is significantly different to zero'
|
||||
% is included.
|
||||
%
|
||||
% [...] = CORRCOEF(X);
|
||||
% calculates the (auto-)correlation matrix of X
|
||||
% [...] = CORRCOEF(X,Y);
|
||||
% calculates the crosscorrelation between X and Y
|
||||
%
|
||||
% [...] = CORRCOEF(..., Mode);
|
||||
% Mode='Pearson' or 'parametric' [default]
|
||||
% gives the correlation coefficient
|
||||
% also known as the 'product-moment coefficient of correlation'
|
||||
% or 'Pearson''s correlation' [1]
|
||||
% Mode='Spearman' gives 'Spearman''s Rank Correlation Coefficient'
|
||||
% This replaces SPEARMAN.M
|
||||
% Mode='Rank' gives a nonparametric Rank Correlation Coefficient
|
||||
% This is the "Spearman rank correlation with proper handling of ties"
|
||||
% This replaces RANKCORR.M
|
||||
%
|
||||
% [...] = CORRCOEF(..., param1, value1, param2, value2, ... );
|
||||
% param value
|
||||
% 'Mode' type of correlation
|
||||
% 'Pearson','parametric'
|
||||
% 'Spearman'
|
||||
% 'rank'
|
||||
% 'rows' how do deal with missing values encoded as NaN's.
|
||||
% 'complete': remove all rows with at least one NaN
|
||||
% 'pairwise': [default]
|
||||
% 'alpha' 0.01 : significance level to compute confidence interval
|
||||
%
|
||||
% [R,p,ci1,ci2,nansig] = CORRCOEF(...);
|
||||
% R is the correlation matrix
|
||||
% R(i,j) is the correlation coefficient r between X(:,i) and Y(:,j)
|
||||
% p gives the significance of R
|
||||
% It tests the null hypothesis that the product moment correlation coefficient is zero
|
||||
% using Student's t-test on the statistic t = r*sqrt(N-2)/sqrt(1-r^2)
|
||||
% where N is the number of samples (Statistics, M. Spiegel, Schaum series).
|
||||
% p > alpha: do not reject the Null hypothesis: 'R is zero'.
|
||||
% p < alpha: The alternative hypothesis 'R is larger than zero' is true with probability (1-alpha).
|
||||
% ci1 lower (1-alpha) confidence interval
|
||||
% ci2 upper (1-alpha) confidence interval
|
||||
% If no alpha is provided, the default alpha is 0.01. This can be changed with function flag_implicit_significance.
|
||||
% nan_sig p-value whether H0: 'NaN''s are not correlated' could be correct
|
||||
% if nan_sig < alpha, H1 ('NaNs are correlated') is very likely.
|
||||
%
|
||||
% The result is only valid if the occurence of NaN's is uncorrelated. In
|
||||
% order to avoid this pitfall, the correlation of NaN's should be checked
|
||||
% or case-wise deletion should be applied.
|
||||
% Case-Wise deletion can be implemented
|
||||
% ix = ~any(isnan([X,Y]),2);
|
||||
% [...] = CORRCOEF(X(ix,:),Y(ix,:),...);
|
||||
%
|
||||
% Correlation (non-random distribution) of NaN's can be checked with
|
||||
% [nan_R,nan_sig]=corrcoef(X,isnan(X))
|
||||
% or [nan_R,nan_sig]=corrcoef([X,Y],isnan([X,Y]))
|
||||
% or [R,p,ci1,ci2] = CORRCOEF(...);
|
||||
%
|
||||
% Further recommandation related to the correlation coefficient:
|
||||
% + LOOK AT THE SCATTERPLOTS to make sure that the relationship is linear
|
||||
% + Correlation is not causation because
|
||||
% it is not clear which parameter is 'cause' and which is 'effect' and
|
||||
% the observed correlation between two variables might be due to the action of other, unobserved variables.
|
||||
%
|
||||
% see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS,
|
||||
% PARTCORRCOEF, flag_implicit_significance
|
||||
%
|
||||
% REFERENCES:
|
||||
% on the correlation coefficient
|
||||
% [ 1] http://mathworld.wolfram.com/CorrelationCoefficient.html
|
||||
% [ 2] http://www.geography.btinternet.co.uk/spearman.htm
|
||||
% [ 3] Hogg, R. V. and Craig, A. T. Introduction to Mathematical Statistics, 5th ed. New York: Macmillan, pp. 338 and 400, 1995.
|
||||
% [ 4] Lehmann, E. L. and D'Abrera, H. J. M. Nonparametrics: Statistical Methods Based on Ranks, rev. ed. Englewood Cliffs, NJ: Prentice-Hall, pp. 292, 300, and 323, 1998.
|
||||
% [ 5] Press, W. H.; Flannery, B. P.; Teukolsky, S. A.; and Vetterling, W. T. Numerical Recipes in FORTRAN: The Art of Scientific Computing, 2nd ed. Cambridge, England: Cambridge University Press, pp. 634-637, 1992
|
||||
% [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html
|
||||
% on the significance test of the correlation coefficient
|
||||
% [11] http://www.met.rdg.ac.uk/cag/STATS/corr.html
|
||||
% [12] http://www.janda.org/c10/Lectures/topic06/L24-significanceR.htm
|
||||
% [13] http://faculty.vassar.edu/lowry/ch4apx.html
|
||||
% [14] http://davidmlane.com/hyperstat/B134689.html
|
||||
% [15] http://www.statsoft.com/textbook/stbasic.html%Correlations
|
||||
% others
|
||||
% [20] http://www.tufts.edu/~gdallal/corr.htm
|
||||
% [21] Fisher transformation http://en.wikipedia.org/wiki/Fisher_transformation
|
||||
|
||||
% $Id: corrcoef.m 9387 2011-12-15 10:42:14Z schloegl $
|
||||
% Copyright (C) 2000-2004,2008,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
|
||||
% Copyright (C) 2014 Dynare Team
|
||||
% This function is part of the NaN-toolbox
|
||||
% http://pub.ist.ac.at/~schloegl/matlab/NaN/
|
||||
|
||||
% This program is free software: you can redistribute it and/or modify
|
||||
% it under the terms of the GNU General Public License as published by
|
||||
% the Free Software Foundation, either version 3 of the License, or
|
||||
% (at your option) any later version.
|
||||
%
|
||||
% This program is distributed in the hope that it will be useful,
|
||||
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
% GNU General Public License for more details.
|
||||
%
|
||||
% You should have received a copy of the GNU General Public License
|
||||
% along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
% Features:
|
||||
% + handles missing values (encoded as NaN's)
|
||||
% + pairwise deletion of missing data
|
||||
% + checks independence of missing values (NaNs)
|
||||
% + parametric and non-parametric (rank) correlation
|
||||
% + Pearson's correlation
|
||||
% + Spearman's rank correlation
|
||||
% + Rank correlation (non-parametric, Spearman rank correlation with proper handling of ties)
|
||||
% + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks
|
||||
% + significance test for null-hypthesis: r=0
|
||||
% + confidence interval included
|
||||
% - rank correlation works for cell arrays, too (no check for missing values).
|
||||
% + compatible with Octave and Matlab
|
||||
|
||||
global FLAG_NANS_OCCURED;
|
||||
|
||||
NARG = nargout; % needed because nargout is not reentrant in Octave, and corrcoef is recursive
|
||||
mode = [];
|
||||
|
||||
if nargin==1
|
||||
Y = [];
|
||||
Mode='Pearson';
|
||||
elseif nargin==0
|
||||
fprintf(2,'Error CORRCOEF: Missing argument(s)\n');
|
||||
elseif nargin>1
|
||||
if ischar(Y)
|
||||
varg = [Y,varargin];
|
||||
Y=[];
|
||||
else
|
||||
varg = varargin;
|
||||
end;
|
||||
|
||||
if length(varg)<1,
|
||||
Mode = 'Pearson';
|
||||
elseif length(varg)==1,
|
||||
Mode = varg{1};
|
||||
else
|
||||
for k = 2:2:length(varg),
|
||||
mode = setfield(mode,lower(varg{k-1}),varg{k});
|
||||
end;
|
||||
if isfield(mode,'mode')
|
||||
Mode = mode.mode;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
if isempty(Mode) Mode='pearson'; end;
|
||||
Mode=[Mode,' '];
|
||||
|
||||
|
||||
|
||||
FLAG_WARNING = warning; % save warning status
|
||||
warning('off');
|
||||
|
||||
[r1,c1]=size(X);
|
||||
if ~isempty(Y)
|
||||
[r2,c2]=size(Y);
|
||||
if r1~=r2,
|
||||
fprintf(2,'Error CORRCOEF: X and Y must have the same number of observations (rows).\n');
|
||||
return;
|
||||
end;
|
||||
NN = real(~isnan(X)')*real(~isnan(Y));
|
||||
else
|
||||
[r2,c2]=size(X);
|
||||
NN = real(~isnan(X)')*real(~isnan(X));
|
||||
end;
|
||||
|
||||
%%%%% generate combinations using indices for pairwise calculation of the correlation
|
||||
YESNAN = any(isnan(X(:))) | any(isnan(Y(:)));
|
||||
if YESNAN,
|
||||
FLAG_NANS_OCCURED=(1==1);
|
||||
if isfield(mode,'rows')
|
||||
if strcmp(mode.rows,'complete')
|
||||
ix = ~any([X,Y],2);
|
||||
X = X(ix,:);
|
||||
if ~isempty(Y)
|
||||
Y = Y(ix,:);
|
||||
end;
|
||||
YESNAN = 0;
|
||||
NN = size(X,1);
|
||||
elseif strcmp(mode.rows,'all')
|
||||
fprintf(1,'Warning: data contains NaNs, rows=pairwise is used.');
|
||||
%%NN(NN < size(X,1)) = NaN;
|
||||
elseif strcmp(mode.rows,'pairwise')
|
||||
%%% default
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
if isempty(Y),
|
||||
IX = ones(c1)-diag(ones(c1,1));
|
||||
[jx, jy ] = find(IX);
|
||||
[jxo,jyo] = find(IX);
|
||||
R = eye(c1);
|
||||
else
|
||||
IX = sparse([],[],[],c1+c2,c1+c2,c1*c2);
|
||||
IX(1:c1,c1+(1:c2)) = 1;
|
||||
[jx,jy] = find(IX);
|
||||
|
||||
IX = ones(c1,c2);
|
||||
[jxo,jyo] = find(IX);
|
||||
R = zeros(c1,c2);
|
||||
end;
|
||||
|
||||
if strcmp(lower(Mode(1:7)),'pearson');
|
||||
% see http://mathworld.wolfram.com/CorrelationCoefficient.html
|
||||
if ~YESNAN,
|
||||
[S,N,SSQ] = sumskipnan(X,1);
|
||||
if ~isempty(Y),
|
||||
[S2,N2,SSQ2] = sumskipnan(Y,1);
|
||||
CC = X'*Y;
|
||||
M1 = S./N;
|
||||
M2 = S2./N2;
|
||||
cc = CC./NN - M1'*M2;
|
||||
R = cc./sqrt((SSQ./N-M1.*M1)'*(SSQ2./N2-M2.*M2));
|
||||
else
|
||||
CC = X'*X;
|
||||
M = S./N;
|
||||
cc = CC./NN - M'*M;
|
||||
v = SSQ./N - M.*M; %max(N-1,0);
|
||||
R = cc./sqrt(v'*v);
|
||||
end;
|
||||
else
|
||||
if ~isempty(Y),
|
||||
X = [X,Y];
|
||||
end;
|
||||
for k = 1:length(jx),
|
||||
%ik = ~any(isnan(X(:,[jx(k),jy(k)])),2);
|
||||
ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k)));
|
||||
[s,n,s2] = sumskipnan(X(ik,[jx(k),jy(k)]),1);
|
||||
v = (s2-s.*s./n)./n;
|
||||
cc = X(ik,jx(k))'*X(ik,jy(k));
|
||||
cc = cc/n(1) - prod(s./n);
|
||||
%r(k) = cc./sqrt(prod(v));
|
||||
R(jxo(k),jyo(k)) = cc./sqrt(prod(v));
|
||||
end;
|
||||
end
|
||||
|
||||
elseif strcmp(lower(Mode(1:4)),'rank');
|
||||
% see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html
|
||||
if ~YESNAN,
|
||||
if isempty(Y)
|
||||
R = corrcoef(ranks(X));
|
||||
else
|
||||
R = corrcoef(ranks(X),ranks(Y));
|
||||
end;
|
||||
else
|
||||
if ~isempty(Y),
|
||||
X = [X,Y];
|
||||
end;
|
||||
for k = 1:length(jx),
|
||||
%ik = ~any(isnan(X(:,[jx(k),jy(k)])),2);
|
||||
ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k)));
|
||||
il = ranks(X(ik,[jx(k),jy(k)]));
|
||||
R(jxo(k),jyo(k)) = corrcoef(il(:,1),il(:,2));
|
||||
end;
|
||||
X = ranks(X);
|
||||
end;
|
||||
|
||||
elseif strcmp(lower(Mode(1:8)),'spearman');
|
||||
% see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html
|
||||
if ~isempty(Y),
|
||||
X = [X,Y];
|
||||
end;
|
||||
|
||||
n = repmat(nan,c1,c2);
|
||||
|
||||
if ~YESNAN,
|
||||
iy = ranks(X); % calculates ranks;
|
||||
|
||||
for k = 1:length(jx),
|
||||
[R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((iy(:,jx(k)) - iy(:,jy(k))).^2); % NN is the number of non-missing values
|
||||
end;
|
||||
else
|
||||
for k = 1:length(jx),
|
||||
%ik = ~any(isnan(X(:,[jx(k),jy(k)])),2);
|
||||
ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k)));
|
||||
il = ranks(X(ik,[jx(k),jy(k)]));
|
||||
% NN is the number of non-missing values
|
||||
[R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((il(:,1) - il(:,2)).^2);
|
||||
end;
|
||||
X = ranks(X);
|
||||
end;
|
||||
R = 1 - 6 * R ./ (n.*(n.*n-1));
|
||||
|
||||
elseif strcmp(lower(Mode(1:7)),'partial');
|
||||
fprintf(2,'Error CORRCOEF: use PARTCORRCOEF \n',Mode);
|
||||
|
||||
return;
|
||||
|
||||
elseif strcmp(lower(Mode(1:7)),'kendall');
|
||||
fprintf(2,'Error CORRCOEF: mode ''%s'' not implemented yet.\n',Mode);
|
||||
|
||||
return;
|
||||
else
|
||||
fprintf(2,'Error CORRCOEF: unknown mode ''%s''\n',Mode);
|
||||
end;
|
||||
|
||||
if (NARG<2),
|
||||
warning(FLAG_WARNING); % restore warning status
|
||||
return;
|
||||
end;
|
||||
|
||||
|
||||
% CONFIDENCE INTERVAL
|
||||
if isfield(mode,'alpha')
|
||||
alpha = mode.alpha;
|
||||
elseif exist('flag_implicit_significance','file'),
|
||||
alpha = flag_implicit_significance;
|
||||
else
|
||||
alpha = 0.01;
|
||||
end;
|
||||
% fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha);
|
||||
|
||||
|
||||
% SIGNIFICANCE TEST
|
||||
R(isnan(R))=0;
|
||||
tmp = 1 - R.*R;
|
||||
tmp(tmp<0) = 0; % prevent tmp<0 i.e. imag(t)~=0
|
||||
t = R.*sqrt(max(NN-2,0)./tmp);
|
||||
|
||||
if exist('t_cdf','file');
|
||||
sig = t_cdf(t,NN-2);
|
||||
elseif exist('tcdf','file')>1;
|
||||
sig = tcdf(t,NN-2);
|
||||
else
|
||||
fprintf('CORRCOEF: significance test not completed because of missing TCDF-function\n')
|
||||
sig = repmat(nan,size(R));
|
||||
end;
|
||||
sig = 2 * min(sig,1 - sig);
|
||||
|
||||
|
||||
if NARG<3,
|
||||
warning(FLAG_WARNING); % restore warning status
|
||||
return;
|
||||
end;
|
||||
|
||||
|
||||
tmp = R;
|
||||
%tmp(ix1 | ix2) = nan; % avoid division-by-zero warning
|
||||
z = log((1+tmp)./(1-tmp))/2; % Fisher transformation [21]
|
||||
%sz = 1./sqrt(NN-3); % standard error of z
|
||||
sz = sqrt(2)*erfinv(1-alpha)./sqrt(NN-3); % confidence interval for alpha of z
|
||||
|
||||
ci1 = tanh(z-sz);
|
||||
ci2 = tanh(z+sz);
|
||||
|
||||
%ci1(isnan(ci1))=R(isnan(ci1)); % in case of isnan(ci), the interval limits are exactly the R value
|
||||
%ci2(isnan(ci2))=R(isnan(ci2));
|
||||
|
||||
if (NARG<5) || ~YESNAN,
|
||||
nan_sig = repmat(NaN,size(R));
|
||||
warning(FLAG_WARNING); % restore warning status
|
||||
return;
|
||||
end;
|
||||
|
||||
%%%%% ----- check independence of NaNs (missing values) -----
|
||||
[nan_R, nan_sig] = corrcoef(X,double(isnan(X)));
|
||||
|
||||
% remove diagonal elements, because these have not any meaning %
|
||||
nan_sig(isnan(nan_R)) = nan;
|
||||
% remove diagonal elements, because these have not any meaning %
|
||||
nan_R(isnan(nan_R)) = 0;
|
||||
|
||||
if 0, any(nan_sig(:) < alpha),
|
||||
tmp = nan_sig(:); % Hack to skip NaN's in MIN(X)
|
||||
min_sig = min(tmp(~isnan(tmp))); % Necessary, because Octave returns NaN rather than min(X) for min(NaN,X)
|
||||
fprintf(1,'CORRCOFF Warning: Missing Values (i.e. NaNs) are not independent of data (p-value=%f)\n', min_sig);
|
||||
fprintf(1,' Its recommended to remove all samples (i.e. rows) with any missing value (NaN).\n');
|
||||
fprintf(1,' The null-hypotheses (NaNs are uncorrelated) is rejected for the following parameter pair(s).\n');
|
||||
[ix,iy] = find(nan_sig < alpha);
|
||||
disp([ix,iy])
|
||||
end;
|
||||
|
||||
%%%%% ----- end of independence check ------
|
||||
|
||||
warning(FLAG_WARNING); % restore warning status
|
||||
return;
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
function FLAG = flag_implicit_skip_nan(i)
|
||||
% FLAG_IMPLICIT_SKIP_NAN sets and gets default mode for handling NaNs
|
||||
% 1 skips NaN's (the default mode if no mode is set)
|
||||
% 0 NaNs are propagated; input NaN's give NaN's at the output
|
||||
%
|
||||
% FLAG = flag_implicit_skip_nan()
|
||||
% gets current mode
|
||||
%
|
||||
% flag_implicit_skip_nan(FLAG) % sets mode
|
||||
%
|
||||
% prevFLAG = flag_implicit_skip_nan(nextFLAG)
|
||||
% gets previous set FLAG and sets FLAG for the future
|
||||
% flag_implicit_skip_nan(prevFLAG)
|
||||
% resets FLAG to previous mode
|
||||
%
|
||||
% It is used in:
|
||||
% SUMSKIPNAN, MEDIAN, QUANTILES, TRIMEAN
|
||||
% and affects many other functions like:
|
||||
% CENTER, KURTOSIS, MAD, MEAN, MOMENT, RMS, SEM, SKEWNESS,
|
||||
% STATISTIC, STD, VAR, ZSCORE etc.
|
||||
%
|
||||
% The mode is stored in the global variable FLAG_implicit_skip_nan
|
||||
% It is recommended to use flag_implicit_skip_nan(1) as default and
|
||||
% flag_implicit_skip_nan(0) should be used for exceptional cases only.
|
||||
% This feature might disappear without further notice, so you should really not
|
||||
% rely on it.
|
||||
|
||||
|
||||
% This program is free software; you can redistribute it and/or modify
|
||||
% it under the terms of the GNU General Public License as published by
|
||||
% the Free Software Foundation; either version 3 of the License, or
|
||||
% (at your option) any later version.
|
||||
%
|
||||
% This program is distributed in the hope that it will be useful,
|
||||
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
% GNU General Public License for more details.
|
||||
%
|
||||
% You should have received a copy of the GNU General Public License
|
||||
% along with this program; if not, write to the Free Software
|
||||
% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
% $Id: flag_implicit_skip_nan.m 8351 2011-06-24 17:35:07Z carandraug $
|
||||
% Copyright (C) 2001-2003,2009 by Alois Schloegl <alois.schloegl@gmail.com>
|
||||
% Copyright (C) 2014 Dynare Team
|
||||
% This function is part of the NaN-toolbox
|
||||
% http://pub.ist.ac.at/~schloegl/matlab/NaN/
|
||||
|
||||
|
||||
persistent FLAG_implicit_skip_nan;
|
||||
|
||||
%% if strcmp(version,'3.6'), FLAG_implicit_skip_nan=(1==1); end; %% hack for the use with Freemat3.6
|
||||
|
||||
%%% set DEFAULT value of FLAG
|
||||
if isempty(FLAG_implicit_skip_nan),
|
||||
FLAG_implicit_skip_nan = (1==1); %logical(1); % logical.m not available on 2.0.16
|
||||
end;
|
||||
|
||||
FLAG = FLAG_implicit_skip_nan;
|
||||
if nargin>0,
|
||||
FLAG_implicit_skip_nan = (i~=0); %logical(i); %logical.m not available in 2.0.16
|
||||
if (~i)
|
||||
warning('flag_implicit_skipnan(0): You are warned!!! You have turned off skipping NaN in sumskipnan. This is not recommended. Make sure you really know what you do.')
|
||||
end;
|
||||
end;
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
function [o,count,SSQ] = sumskipnan(x, DIM, W)
|
||||
% SUMSKIPNAN adds all non-NaN values.
|
||||
%
|
||||
% All NaN's are skipped; NaN's are considered as missing values.
|
||||
% SUMSKIPNAN of NaN's only gives O; and the number of valid elements is return.
|
||||
% SUMSKIPNAN is also the elementary function for calculating
|
||||
% various statistics (e.g. MEAN, STD, VAR, RMS, MEANSQ, SKEWNESS,
|
||||
% KURTOSIS, MOMENT, STATISTIC etc.) from data with missing values.
|
||||
% SUMSKIPNAN implements the DIMENSION-argument for data with missing values.
|
||||
% Also the second output argument return the number of valid elements (not NaNs)
|
||||
%
|
||||
% Y = sumskipnan(x [,DIM])
|
||||
% [Y,N,SSQ] = sumskipnan(x [,DIM])
|
||||
% [...] = sumskipnan(x, DIM, W)
|
||||
%
|
||||
% x input data
|
||||
% DIM dimension (default: [])
|
||||
% empty DIM sets DIM to first non singleton dimension
|
||||
% W weight vector for weighted sum, numel(W) must fit size(x,DIM)
|
||||
% Y resulting sum
|
||||
% N number of valid (not missing) elements
|
||||
% SSQ sum of squares
|
||||
%
|
||||
% the function FLAG_NANS_OCCURED() returns whether any value in x
|
||||
% is a not-a-number (NaN)
|
||||
%
|
||||
% features:
|
||||
% - can deal with NaN's (missing values)
|
||||
% - implements dimension argument.
|
||||
% - computes weighted sum
|
||||
% - compatible with Matlab and Octave
|
||||
%
|
||||
% see also: FLAG_NANS_OCCURED, SUM, NANSUM, MEAN, STD, VAR, RMS, MEANSQ,
|
||||
% SSQ, MOMENT, SKEWNESS, KURTOSIS, SEM
|
||||
|
||||
|
||||
% This program is free software; you can redistribute it and/or modify
|
||||
% it under the terms of the GNU General Public License as published by
|
||||
% the Free Software Foundation; either version 3 of the License, or
|
||||
% (at your option) any later version.
|
||||
%
|
||||
% This program is distributed in the hope that it will be useful,
|
||||
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
% GNU General Public License for more details.
|
||||
%
|
||||
% You should have received a copy of the GNU General Public License
|
||||
% along with this program; If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
% $Id: sumskipnan.m 9033 2011-11-08 20:58:07Z schloegl $
|
||||
% Copyright (C) 2000-2005,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
|
||||
% Copyright (C) 2014 Dynare Team
|
||||
% This function is part of the NaN-toolbox
|
||||
% http://pub.ist.ac.at/~schloegl/matlab/NaN/
|
||||
|
||||
|
||||
global FLAG_NANS_OCCURED;
|
||||
|
||||
if nargin<2,
|
||||
DIM = [];
|
||||
end;
|
||||
if nargin<3,
|
||||
W = [];
|
||||
end;
|
||||
|
||||
% an efficient implementation in C of the following lines
|
||||
% could significantly increase performance
|
||||
% only one loop and only one check for isnan is needed
|
||||
% An MEX-Implementation is available in sumskipnan.cpp
|
||||
%
|
||||
% Outline of the algorithm:
|
||||
% for { k=1,o=0,count=0; k++; k<N}
|
||||
% if ~isnan(i(k))
|
||||
% { o += x(k);
|
||||
% count += 1;
|
||||
% tmp = x(k)*x(k)
|
||||
% o2 += tmp;
|
||||
% o3 += tmp.*tmp;
|
||||
% };
|
||||
|
||||
if isempty(DIM),
|
||||
DIM = find(size(x)>1,1);
|
||||
if isempty(DIM), DIM = 1; end;
|
||||
end
|
||||
if (DIM<1), DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% non-float data
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
if (isempty(W) && (~(isa(x,'float') || isa(x,'double')))) || ~flag_implicit_skip_nan(), %%% skip always NaN's
|
||||
if ~isempty(W)
|
||||
error('SUMSKIPNAN: weighted sum of integers not supported, yet');
|
||||
end;
|
||||
x = double(x);
|
||||
o = sum(x,DIM);
|
||||
if nargout>1
|
||||
sz = size(x);
|
||||
N = sz(DIM);
|
||||
sz(DIM) = 1;
|
||||
count = repmat(N,sz);
|
||||
if nargout>2
|
||||
x = x.*x;
|
||||
SSQ = sum(x,DIM);
|
||||
end;
|
||||
end;
|
||||
return;
|
||||
end;
|
||||
|
||||
if (length(size(x))<DIM)
|
||||
error('SUMSKIPNAN: DIM argument larger than number of dimensions of x');
|
||||
elseif ~isempty(W) && (size(x,DIM)~=numel(W))
|
||||
error('SUMSKIPNAN: size of weight vector does not match size(x,DIM)');
|
||||
end;
|
||||
|
||||
%% mex and oct files expect double
|
||||
x = double(x);
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% use Matlab-MEX function when available
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%if 1,
|
||||
try
|
||||
|
||||
%% using sumskipnan_mex.mex
|
||||
|
||||
%% !!! hack: FLAG_NANS_OCCURED is an output argument, reserve memory !!!
|
||||
if isempty(FLAG_NANS_OCCURED),
|
||||
FLAG_NANS_OCCURED = logical(0); % default value
|
||||
end;
|
||||
|
||||
if (nargout<2),
|
||||
o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
|
||||
if (~isreal(x))
|
||||
io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
|
||||
o = o + i*io;
|
||||
end;
|
||||
return;
|
||||
elseif (nargout==2),
|
||||
[o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
|
||||
if (~isreal(x))
|
||||
[io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
|
||||
if any(count(:)-icount(:))
|
||||
error('Number of NaNs differ for REAL and IMAG part');
|
||||
else
|
||||
o = o+i*io;
|
||||
end;
|
||||
end;
|
||||
return;
|
||||
elseif (nargout>=3),
|
||||
[o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
|
||||
if (~isreal(x))
|
||||
[io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
|
||||
if any(count(:)-icount(:))
|
||||
error('Number of NaNs differ for REAL and IMAG part');
|
||||
else
|
||||
o = o+i*io;
|
||||
SSQ = SSQ+iSSQ;
|
||||
end;
|
||||
end;
|
||||
return;
|
||||
end;
|
||||
end;
|
||||
|
||||
if ~isempty(W)
|
||||
error('weighted sumskipnan requires sumskipnan_mex');
|
||||
end;
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% count non-NaN's
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
if nargout>1,
|
||||
count = sum(x==x,DIM);
|
||||
FLAG_NANS_OCCURED = any(count(:)<size(x,DIM));
|
||||
end;
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% replace NaN's with zero
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
x(x~=x) = 0;
|
||||
o = sum(x,DIM);
|
||||
|
||||
if nargout>2,
|
||||
x = real(x).^2 + imag(x).^2;
|
||||
SSQ = sum(x,DIM);
|
||||
end;
|
||||
|
||||
%!assert(sumskipnan([1,2],1),[1,2])
|
||||
%!assert(sumskipnan([1,NaN],2),1)
|
||||
%!assert(sumskipnan([1,NaN],2),1)
|
||||
%!assert(sumskipnan([nan,1,4,5]),10)
|
||||
%!assert(sumskipnan([nan,1,4,5]',1,[3;2;1;0]),6)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue