dynare/matlab/utilities/dataset/describe_missing_data.m

129 lines
3.5 KiB
Matlab

function [i,n,s,j] = describe_missing_data(data)
% This function reads the dataset and determines the location of the missing observations (defined by NaNs)
%@info:
%! @deftypefn {Function File} {[@var{i}, @var{n}, @var{s}, @var{j} ] =} describe_missing_data (@var{data}, @var{gend}, @var{nvarobs})
%! This function reads the dataset and determines where are the missing observations.
%!
%! @strong{Inputs}
%! @table @var
%! @item data
%! Real matrix (T-by-N) for the dataset.
%! @end table
%!
%! @strong{Outputs}
%! @table @var
%! @item i
%! cell array (1-by-T). Each element is a @math{p_t\times 1} column vector of indices targeting the non-NaN variables at time t.
%! @item n
%! Integer scalar. The effective number of observations:
%! @math(n=\sum_{t=1}^T p_t)
%! @item s
%! Integer scalar. The value of the time index such that @math(p_t=p_s) for all @math(t\geq s).
%! @item j
%! cell array (1-by-N). Each element is a column vector targeting to the non-NaN observations of a variable.
%! @end table
%!
%! @end deftypefn
%@eod:
% Copyright © 2008-2014 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <https://www.gnu.org/licenses/>.
% Original author: stephane DOT adjemian AT univ DASH lemans DOT fr
[observation_index,variable_index] = find(~isnan(data));
[T,N] = size(data);
i = cell(1,T);
j = cell(1,N);
missing_observations_counter = NaN(T,1);
for obs=1:T
idx = find(observation_index==obs);
tmp = variable_index(idx);
missing_observations_counter(obs,1) = N-length(tmp);
if rows(tmp(:))
i(obs) = { tmp(:) };
else
i(obs) = { [] };
end
end
missing_observations_counter = cumsum(missing_observations_counter);
n = length(variable_index);
if ~missing_observations_counter
s = 1;
else
tmp = find(missing_observations_counter>=(T*N-n));
s = tmp(1)+1;
end
if nargout>3
for var=1:N
idx = find(variable_index==var);
tmp = observation_index(idx);
j(var) = { tmp(:) };
end
end
%@test:1
%$ % Define a data set.
%$ A = [ 1 1 ; ...
%$ 1 NaN ; ...
%$ NaN 1 ; ...
%$ 1 1 ; ...
%$ NaN NaN ; ...
%$ 1 NaN ; ...
%$ 1 NaN ; ...
%$ 1 1 ; ...
%$ 1 1 ; ...
%$ 1 1 ; ...
%$ 1 1 ];
%$
%$ % Define expected results.
%$ eB = cell(1,11);
%$ eB(1) = { transpose(1:2) };
%$ eB(2) = { 1 };
%$ eB(3) = { 2 };
%$ eB(4) = { transpose(1:2)};
%$ eB(5) = { [] };
%$ eB(6) = { 1 };
%$ eB(7) = { 1 };
%$ eB(8) = { transpose(1:2) };
%$ eB(9) = { transpose(1:2) };
%$ eB(10) = { transpose(1:2) };
%$ eB(11) = { transpose(1:2) };
%$ eC = 16;
%$ eD = 8;
%$ eE = cell(1,2);
%$ eE(1) = { [1; 2; 4; transpose(6:11)] };
%$ eE(2) = { [1; 3; 4; transpose(8:11)] };
%$
%$ % Call the tested routine.
%$ [B,C,D,E] = describe_missing_data(transpose(A));
%$
%$ % Check the results.
%$ t(1) = dassert(B,eB);
%$ t(2) = dassert(C,eC);
%$ t(3) = dassert(D,eD);
%$ t(4) = dassert(E,eE);
%$ T = all(t);
%@eof:1