129 lines
3.5 KiB
Matlab
129 lines
3.5 KiB
Matlab
function [i,n,s,j] = describe_missing_data(data)
|
|
% This function reads the dataset and determines the location of the missing observations (defined by NaNs)
|
|
|
|
%@info:
|
|
%! @deftypefn {Function File} {[@var{i}, @var{n}, @var{s}, @var{j} ] =} describe_missing_data (@var{data}, @var{gend}, @var{nvarobs})
|
|
%! This function reads the dataset and determines where are the missing observations.
|
|
%!
|
|
%! @strong{Inputs}
|
|
%! @table @var
|
|
%! @item data
|
|
%! Real matrix (T-by-N) for the dataset.
|
|
%! @end table
|
|
%!
|
|
%! @strong{Outputs}
|
|
%! @table @var
|
|
%! @item i
|
|
%! cell array (1-by-T). Each element is a @math{p_t\times 1} column vector of indices targeting the non-NaN variables at time t.
|
|
%! @item n
|
|
%! Integer scalar. The effective number of observations:
|
|
%! @math(n=\sum_{t=1}^T p_t)
|
|
%! @item s
|
|
%! Integer scalar. The value of the time index such that @math(p_t=p_s) for all @math(t\geq s).
|
|
%! @item j
|
|
%! cell array (1-by-N). Each element is a column vector targeting to the non-NaN observations of a variable.
|
|
%! @end table
|
|
%!
|
|
%! @end deftypefn
|
|
%@eod:
|
|
|
|
% Copyright © 2008-2014 Dynare Team
|
|
%
|
|
% This file is part of Dynare.
|
|
%
|
|
% Dynare is free software: you can redistribute it and/or modify
|
|
% it under the terms of the GNU General Public License as published by
|
|
% the Free Software Foundation, either version 3 of the License, or
|
|
% (at your option) any later version.
|
|
%
|
|
% Dynare is distributed in the hope that it will be useful,
|
|
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
% GNU General Public License for more details.
|
|
%
|
|
% You should have received a copy of the GNU General Public License
|
|
% along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
% Original author: stephane DOT adjemian AT univ DASH lemans DOT fr
|
|
|
|
[observation_index,variable_index] = find(~isnan(data));
|
|
[T,N] = size(data);
|
|
|
|
i = cell(1,T);
|
|
j = cell(1,N);
|
|
missing_observations_counter = NaN(T,1);
|
|
|
|
for obs=1:T
|
|
idx = find(observation_index==obs);
|
|
tmp = variable_index(idx);
|
|
missing_observations_counter(obs,1) = N-length(tmp);
|
|
if rows(tmp(:))
|
|
i(obs) = { tmp(:) };
|
|
else
|
|
i(obs) = { [] };
|
|
end
|
|
end
|
|
|
|
missing_observations_counter = cumsum(missing_observations_counter);
|
|
|
|
n = length(variable_index);
|
|
|
|
if ~missing_observations_counter
|
|
s = 1;
|
|
else
|
|
tmp = find(missing_observations_counter>=(T*N-n));
|
|
s = tmp(1)+1;
|
|
end
|
|
|
|
if nargout>3
|
|
for var=1:N
|
|
idx = find(variable_index==var);
|
|
tmp = observation_index(idx);
|
|
j(var) = { tmp(:) };
|
|
end
|
|
end
|
|
|
|
|
|
%@test:1
|
|
%$ % Define a data set.
|
|
%$ A = [ 1 1 ; ...
|
|
%$ 1 NaN ; ...
|
|
%$ NaN 1 ; ...
|
|
%$ 1 1 ; ...
|
|
%$ NaN NaN ; ...
|
|
%$ 1 NaN ; ...
|
|
%$ 1 NaN ; ...
|
|
%$ 1 1 ; ...
|
|
%$ 1 1 ; ...
|
|
%$ 1 1 ; ...
|
|
%$ 1 1 ];
|
|
%$
|
|
%$ % Define expected results.
|
|
%$ eB = cell(1,11);
|
|
%$ eB(1) = { transpose(1:2) };
|
|
%$ eB(2) = { 1 };
|
|
%$ eB(3) = { 2 };
|
|
%$ eB(4) = { transpose(1:2)};
|
|
%$ eB(5) = { [] };
|
|
%$ eB(6) = { 1 };
|
|
%$ eB(7) = { 1 };
|
|
%$ eB(8) = { transpose(1:2) };
|
|
%$ eB(9) = { transpose(1:2) };
|
|
%$ eB(10) = { transpose(1:2) };
|
|
%$ eB(11) = { transpose(1:2) };
|
|
%$ eC = 16;
|
|
%$ eD = 8;
|
|
%$ eE = cell(1,2);
|
|
%$ eE(1) = { [1; 2; 4; transpose(6:11)] };
|
|
%$ eE(2) = { [1; 3; 4; transpose(8:11)] };
|
|
%$
|
|
%$ % Call the tested routine.
|
|
%$ [B,C,D,E] = describe_missing_data(transpose(A));
|
|
%$
|
|
%$ % Check the results.
|
|
%$ t(1) = dassert(B,eB);
|
|
%$ t(2) = dassert(C,eC);
|
|
%$ t(3) = dassert(D,eD);
|
|
%$ t(4) = dassert(E,eE);
|
|
%$ T = all(t);
|
|
%@eof:1 |