dynare/matlab/utilities/dataset/makedataset.m

289 lines
12 KiB
Matlab

function [DynareDataset, DatasetInfo, newdatainterface] = makedataset(DynareOptions, initialconditions, gsa_flag)
% Initialize a dataset as a dseries object.
%
%
% INPUTS
% ======
%
% DynareOptions [struct] Structure of options built by Dynare's preprocessor.
% initialconditions [double] number of lags for VAR and DSGE_VAR
% gsa_flag [integer] 1: GSA, 0: other
%
% OUTPUTS
% =======
%
% DynareDataset [dseries] The dataset.
% DatasetInfo [struct] Various informations about the dataset (descriptive statistics and missing observations).
%
% EXAMPLE
% =======
%
% [dataset_, dataset_info] = makedataset(options_) ;
%
%
% See also dynare_estimation_init
% Copyright (C) 2014-2018 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
if nargin<3
gsa_flag = 0;
end
if nargin<2 || isempty(initialconditions)
% If a the sample is to be used for the estimation of a VAR or DSGE-VAR model
% the second argument must be a strictly positive integer (the number of lags).
initialconditions = 0;
end
if isempty(DynareOptions.datafile) && isempty(DynareOptions.dataset.file) && isempty(DynareOptions.dataset.series)
if gsa_flag
DynareDataset = dseries();
DatasetInfo = struct('missing', struct('state', 0, 'aindex', [], 'vindex', [], 'number_of_observations', NaN, 'no_more_missing_observations', NaN), ...
'descriptive', struct('mean', [], 'covariance', [], 'correlation', [], 'autocovariance', []));
newdatainterface=0;
return
else
error('makedataset: datafile option is missing!')
end
end
if isempty(DynareOptions.datafile) && ~isempty(DynareOptions.dataset.file)
datafile = DynareOptions.dataset.file;
newdatainterface = 1;
elseif isempty(DynareOptions.datafile) && ~isempty(DynareOptions.dataset.series)
try
dseriesobjectforuserdataset = evalin('base', DynareOptions.dataset.series);
catch
error(sprintf('makedataset: %s is unknown!', DynareOptions.dataset.series))
end
if ~isdseries(dseriesobjectforuserdataset)
error(sprintf('makedataset: %s has to be a dseries object!', DynareOptions.dataset.series))
end
datafile = [];
newdatainterface = 1;
elseif ~isempty(DynareOptions.datafile) && isempty(DynareOptions.dataset.file)
datafile = DynareOptions.datafile;
newdatainterface = 0;
elseif ~isempty(DynareOptions.datafile) && ~isempty(DynareOptions.dataset.file)
error('makedataset: You cannot simultaneously use the data command and the datafile option (in the estimation command)!')
else
error('makedataset: You have to specify the datafile!')
end
% Check extension.
if ~isempty(datafile)
allowed_extensions = {'m','mat','csv','xls','xlsx'};
datafile_extension = get_file_extension(datafile);
if isempty(datafile_extension)
available_extensions = {}; j = 1;
[datafilepath,datafilename,datafileext] = fileparts(datafile);
if isempty(datafilepath)
datafilepath = '.';
end
dircontent = dir(datafilepath);
for i=1:length(allowed_extensions)
if ~isempty(strmatch([datafilename '.' allowed_extensions{i}],{dircontent.name},'exact'))
available_extensions(j) = {allowed_extensions{i}};
j = j+1;
end
end
if isempty(available_extensions)
error(['makedataset: I can''t find a datafile (with allowed extension m, mat, csv, xls or xlsx)!'])
end
if length(available_extensions)>1
error(sprintf(['makedataset: You did not specify an extension for the datafile, but more than one candidate ' ...
'is available in the designated folder!\nPlease, add an extension to the datafile ' ...
'(m, mat, csv, xls or xlsx are permitted extensions).']));
end
datafile = [datafile '.' available_extensions{1}];
end
end
% Load the data in a dseries object.
if ~isempty(datafile)
if ~( newdatainterface==0 && (strcmp(datafile(end-1:end),'.m')|| strcmp(datafile(end-3:end),'.mat')))
DynareDataset = dseries(datafile);
else
if strcmp(datafile(end-1:end),'.m')
% Load an m file with the old interface.
DynareDataset = load_m_file_data_legacy(datafile, DynareOptions.varobs);
elseif strcmp(datafile(end-3:end),'.mat')
% Load a mat file with the old interface.
DynareDataset = load_mat_file_data_legacy(datafile, DynareOptions.varobs);
end
end
else
DynareDataset = dseriesobjectforuserdataset;
clear('dseriesobjectforuserdataset');
end
if size(unique(DynareDataset.name),1)~=size(DynareDataset.name,1)
error('makedataset: the data set must not contain two variables with the same name and must not contain empty/non-named columns.')
end
% Select a subset of the variables.
DynareDataset = DynareDataset{DynareOptions.varobs{:}};
% Apply log function if needed.
if DynareOptions.loglinear && ~DynareOptions.logdata
DynareDataset = DynareDataset.log();
end
% Test if an initial period (different from its default value) is explicitely defined in the datafile.
if isequal(DynareDataset.init, dates(1,1))
dataset_default_initial_period = 1;
else
dataset_default_initial_period = 0;
end
% Test if an initial period (different from its default value) is explicitely defined in the mod file with the set_time command.
if ~isdates(DynareOptions.initial_period) && isnan(DynareOptions.initial_period)
set_time_default_initial_period = 1;
else
set_time_default_initial_period = 0;
end
if ~set_time_default_initial_period && dataset_default_initial_period
% Overwrite the initial period in dataset (it was set to default).
% Note that the updates of freq and time members are auto-magically
% done by dseries::subsasgn overloaded method.
DynareDataset.init = DynareOptions.initial_period;
end
if set_time_default_initial_period && ~dataset_default_initial_period
% Overwrite the global initial period defined by set_time (it was set to default).
DynareOptions.initial_period = DynareDataset.init;
end
if ~set_time_default_initial_period && ~dataset_default_initial_period
% Check if dataset.init and options_.initial_period are identical.
if DynareOptions.initial_period<DynareDataset.init
error('makedataset: The date as defined by the set_time command is not consistent with the initial period in the database!')
end
end
% Set firstobs, lastobs and nobs
if newdatainterface
if isempty(DynareOptions.dataset.firstobs)
% first_obs option was not used in the data command.
firstobs = DynareDataset.init;
else
firstobs = DynareOptions.dataset.firstobs;
end
if isnan(DynareOptions.dataset.nobs)
% nobs option was not used in the data command.
if isempty(DynareOptions.dataset.lastobs)
% last_obs option was not used in the data command.
nobs = DynareDataset.nobs;
lastobs = DynareDataset.dates(end);
else
lastobs = DynareOptions.dataset.lastobs;
nobs = lastobs-firstobs+1;
end
else
nobs = DynareOptions.dataset.nobs;
if isempty(DynareOptions.dataset.lastobs)
% last_obs option was not used in the data command.
lastobs = firstobs+(nobs-1);
else
% last_obs and nobs were used in the data command. Check that they are consistent (with firstobs).
if ~isequal(lastobs,firstobs+(nobs-1))
error(sprintf('makedataset: Options last_obs (%s), first_obs (%s) and nobs (%s) are not consistent!',char(lastobs),char(firstobs),num2str(nobs)));
end
end
end
else
if isnan(DynareOptions.first_obs)
firstobs = DynareDataset.init;
else
firstobs = DynareDataset.dates(DynareOptions.first_obs);
end
if isnan(DynareOptions.nobs)
lastobs = DynareDataset.dates(end);
nobs = lastobs-firstobs+1;
else
nobs = DynareOptions.nobs;
lastobs = firstobs+(nobs-1);
end
end
% Add initial conditions if needed
FIRSTOBS = firstobs-initialconditions;
% Check that firstobs belongs to DynareDataset.dates
if firstobs<DynareDataset.init
error(sprintf('makedataset: first_obs (%s) cannot be less than the first date in the dataset (%s)!',char(firstobs),char(DynareDataset.init)))
end
% Check that FIRSTOBS belongs to DynareDataset.dates
if initialconditions && FIRSTOBS<DynareDataset.init
error(sprintf('makedataset: first_obs (%s) - %i cannot be less than the first date in the dataset (%s)!\nReduce the number of lags in the VAR model or increase the value of first_obs\nto at least first_obs=%i.', char(firstobs), initialconditions, char(DynareDataset.init),initialconditions+1));
end
% Check that lastobs belongs to DynareDataset.dates...
if newdatainterface
if lastobs>DynareDataset.dates(end)
error(sprintf('makedataset: last_obs (%s) cannot be greater than the last date in the dataset (%s)!',char(lastobs),char(DynareDataset.dates(end))))
end
else
% ... or check that nobs is smaller than the number of observations in DynareDataset.
if nobs>DynareDataset.nobs
error(sprintf('makedataset: nobs (%s) cannot be greater than the last date in the dataset (%s)!', num2str(nobs), num2str(DynareDataset.nobs)))
end
end
% Select a subsample.
DynareDataset = DynareDataset(FIRSTOBS:lastobs);
% Initialize DatasetInfo structure.
DatasetInfo = struct('missing', struct('state', NaN, 'aindex', [], 'vindex', [], 'number_of_observations', NaN, 'no_more_missing_observations', NaN), ...
'descriptive', struct('mean', [], 'covariance', [], 'correlation', [], 'autocovariance', []));
% Fill DatasetInfo.missing if some observations are missing
DatasetInfo.missing.state = isanynan(DynareDataset.data);
if DatasetInfo.missing.state
[DatasetInfo.missing.aindex, DatasetInfo.missing.number_of_observations, DatasetInfo.missing.no_more_missing_observations, DatasetInfo.missing.vindex] = ...
describe_missing_data(DynareDataset.data);
else
DatasetInfo.missing.aindex = num2cell(transpose(repmat(1:DynareDataset.vobs,DynareDataset.nobs,1)),1);
DatasetInfo.missing.no_more_missing_observations = 1;
end
% Compute the empirical mean of the observed variables.
DatasetInfo.descriptive.mean = nanmean(DynareDataset.data,1);
% Compute the empirical covariance matrix of the observed variables.
DatasetInfo.descriptive.covariance = nancovariance(DynareDataset.data);
% Compute the empirical correlation matrix of the observed variables.
normalization_matrix = diag(1./sqrt(diag(DatasetInfo.descriptive.covariance)));
DatasetInfo.descriptive.correlation = normalization_matrix*DatasetInfo.descriptive.covariance*normalization_matrix;
% Compute autocorrelation function.
DatasetInfo.descriptive.autocovariance = nanautocovariance(DynareDataset.data, DynareOptions.ar);
% Save raw data.
DatasetInfo.rawdata = DynareDataset.data;
% Prefilter the data if needed (remove the mean).
if isequal(DynareOptions.prefilter, 1)
DynareDataset = DynareDataset.detrend();
end