2014-07-16 17:04:13 +02:00
function [DynareDataset, DatasetInfo, newdatainterface] = makedataset ( DynareOptions, initialconditions, gsa_flag)
2014-06-16 17:41:59 +02:00
% Initialize a dataset as a dseries object.
%
%
% INPUTS
% ======
%
2017-10-04 10:40:42 +02:00
% DynareOptions [struct] Structure of options built by Dynare's preprocessor.
% initialconditions [double] number of lags for VAR and DSGE_VAR
% gsa_flag [integer] 1: GSA, 0: other
2014-06-16 17:41:59 +02:00
%
% OUTPUTS
% =======
%
% DynareDataset [dseries] The dataset.
% DatasetInfo [struct] Various informations about the dataset (descriptive statistics and missing observations).
%
% EXAMPLE
% =======
%
% [dataset_, dataset_info] = makedataset(options_) ;
%
%
% See also dynare_estimation_init
2022-04-13 13:15:19 +02:00
% Copyright © 2014-2018 Dynare Team
2017-05-18 18:36:38 +02:00
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
2021-06-09 17:33:48 +02:00
% along with Dynare. If not, see <https://www.gnu.org/licenses/>.
2019-12-20 16:28:06 +01:00
2017-05-18 18:36:38 +02:00
2014-06-25 11:34:39 +02:00
if nargin < 3
gsa_flag = 0 ;
end
if nargin < 2 || isempty ( initialconditions )
2014-06-23 10:55:08 +02:00
% If a the sample is to be used for the estimation of a VAR or DSGE-VAR model
% the second argument must be a strictly positive integer (the number of lags).
initialconditions = 0 ;
end
2014-06-17 16:03:30 +02:00
if isempty ( DynareOptions . datafile ) && isempty ( DynareOptions . dataset . file ) && isempty ( DynareOptions . dataset . series )
2014-06-16 17:41:59 +02:00
if gsa_flag
DynareDataset = dseries ( ) ;
2014-11-18 11:34:49 +01:00
DatasetInfo = struct ( ' missing' , struct ( ' state' , 0 , ' aindex' , [ ] , ' vindex' , [ ] , ' number_of_observations' , NaN , ' no_more_missing_observations' , NaN ) , ...
2017-05-16 15:10:20 +02:00
' descriptive' , struct ( ' mean' , [ ] , ' covariance' , [ ] , ' correlation' , [ ] , ' autocovariance' , [ ] ) ) ;
2014-07-23 12:04:47 +02:00
newdatainterface = 0 ;
2014-06-16 17:41:59 +02:00
return
else
2014-06-17 16:03:30 +02:00
error ( ' makedataset: datafile option is missing!' )
2014-06-16 17:41:59 +02:00
end
end
if isempty ( DynareOptions . datafile ) && ~ isempty ( DynareOptions . dataset . file )
datafile = DynareOptions . dataset . file ;
newdatainterface = 1 ;
2014-06-17 16:03:30 +02:00
elseif isempty ( DynareOptions . datafile ) && ~ isempty ( DynareOptions . dataset . series )
try
dseriesobjectforuserdataset = evalin ( ' base' , DynareOptions . dataset . series ) ;
catch
error ( sprintf ( ' makedataset: %s is unknown!' , DynareOptions . dataset . series ) )
end
if ~ isdseries ( dseriesobjectforuserdataset )
error ( sprintf ( ' makedataset: %s has to be a dseries object!' , DynareOptions . dataset . series ) )
end
datafile = [ ] ;
newdatainterface = 1 ;
2014-06-16 17:41:59 +02:00
elseif ~ isempty ( DynareOptions . datafile ) && isempty ( DynareOptions . dataset . file )
datafile = DynareOptions . datafile ;
newdatainterface = 0 ;
2018-02-01 19:14:13 +01:00
elseif ~ isempty ( DynareOptions . datafile ) && ~ isempty ( DynareOptions . dataset . file )
2014-09-22 20:17:20 +02:00
error ( ' makedataset: You cannot simultaneously use the data command and the datafile option (in the estimation command)!' )
2014-06-16 17:41:59 +02:00
else
2014-09-22 20:17:20 +02:00
error ( ' makedataset: You have to specify the datafile!' )
2014-06-16 17:41:59 +02:00
end
% Check extension.
2014-06-17 16:03:30 +02:00
if ~ isempty ( datafile )
allowed_extensions = { ' m' , ' mat' , ' csv' , ' xls' , ' xlsx' } ;
datafile_extension = get_file_extension ( datafile ) ;
if isempty ( datafile_extension )
available_extensions = { } ; j = 1 ;
2015-06-05 16:35:05 +02:00
[ datafilepath , datafilename , datafileext ] = fileparts ( datafile ) ;
if isempty ( datafilepath )
datafilepath = ' .' ;
end
dircontent = dir ( datafilepath ) ;
2014-06-17 16:03:30 +02:00
for i = 1 : length ( allowed_extensions )
2015-06-05 16:35:05 +02:00
if ~ isempty ( strmatch ( [ datafilename ' .' allowed_extensions { i } ] , { dircontent . name } , ' exact' ) )
2014-06-17 16:03:30 +02:00
available_extensions ( j ) = { allowed_extensions { i } } ;
j = j + 1 ;
end
2014-06-16 17:41:59 +02:00
end
2014-06-17 16:03:30 +02:00
if isempty ( available_extensions )
2014-09-22 20:17:20 +02:00
error ( [ ' makedataset: I can' ' t find a datafile (with allowed extension m, mat, csv, xls or xlsx)!' ] )
2014-06-17 16:03:30 +02:00
end
if length ( available_extensions ) > 1
2014-09-22 20:17:20 +02:00
error ( sprintf ( [ ' makedataset: You did not specify an extension for the datafile, but more than one candidate ' ...
' is available in the designated folder!\nPlease, add an extension to the datafile ' ...
' (m, mat, csv, xls or xlsx are permitted extensions).' ] ) ) ;
2014-06-17 16:03:30 +02:00
end
datafile = [ datafile ' .' available_extensions { 1 } ] ;
2014-06-16 17:41:59 +02:00
end
end
% Load the data in a dseries object.
2014-06-17 16:03:30 +02:00
if ~ isempty ( datafile )
2017-04-27 14:04:32 +02:00
if ~ ( newdatainterface == 0 && ( strcmp ( datafile ( end - 1 : end ) , ' .m' ) || strcmp ( datafile ( end - 3 : end ) , ' .mat' ) ) )
2014-10-20 17:28:35 +02:00
DynareDataset = dseries ( datafile ) ;
else
2017-04-27 14:04:32 +02:00
if strcmp ( datafile ( end - 1 : end ) , ' .m' )
2017-05-10 16:35:58 +02:00
% Load an m file with the old interface.
2017-04-27 14:04:32 +02:00
DynareDataset = load_m_file_data_legacy ( datafile , DynareOptions . varobs ) ;
elseif strcmp ( datafile ( end - 3 : end ) , ' .mat' )
2017-05-10 16:35:58 +02:00
% Load a mat file with the old interface.
2017-05-16 15:10:20 +02:00
DynareDataset = load_mat_file_data_legacy ( datafile , DynareOptions . varobs ) ;
2017-04-27 14:04:32 +02:00
end
2014-10-20 17:28:35 +02:00
end
2014-06-17 16:03:30 +02:00
else
DynareDataset = dseriesobjectforuserdataset ;
clear ( ' dseriesobjectforuserdataset' ) ;
end
2014-06-16 17:41:59 +02:00
2015-06-08 11:16:40 +02:00
if size ( unique ( DynareDataset . name ) , 1 ) ~= size ( DynareDataset . name , 1 )
error ( ' makedataset: the data set must not contain two variables with the same name and must not contain empty/non-named columns.' )
end
2014-06-16 17:41:59 +02:00
% Select a subset of the variables.
DynareDataset = DynareDataset { DynareOptions . varobs { : } } ;
% Apply log function if needed.
if DynareOptions . loglinear && ~ DynareOptions . logdata
DynareDataset = DynareDataset . log ( ) ;
end
% Test if an initial period (different from its default value) is explicitely defined in the datafile.
if isequal ( DynareDataset . init , dates ( 1 , 1 ) )
dataset_default_initial_period = 1 ;
else
dataset_default_initial_period = 0 ;
end
% Test if an initial period (different from its default value) is explicitely defined in the mod file with the set_time command.
2014-07-16 17:02:58 +02:00
if ~ isdates ( DynareOptions . initial_period ) && isnan ( DynareOptions . initial_period )
2014-06-16 17:41:59 +02:00
set_time_default_initial_period = 1 ;
else
set_time_default_initial_period = 0 ;
end
if ~ set_time_default_initial_period && dataset_default_initial_period
% Overwrite the initial period in dataset (it was set to default).
% Note that the updates of freq and time members are auto-magically
% done by dseries::subsasgn overloaded method.
DynareDataset . init = DynareOptions . initial_period ;
end
if set_time_default_initial_period && ~ dataset_default_initial_period
% Overwrite the global initial period defined by set_time (it was set to default).
DynareOptions . initial_period = DynareDataset . init ;
end
if ~ set_time_default_initial_period && ~ dataset_default_initial_period
% Check if dataset.init and options_.initial_period are identical.
2014-07-16 17:02:58 +02:00
if DynareOptions . initial_period < DynareDataset . init
2014-09-22 20:17:20 +02:00
error ( ' makedataset: The date as defined by the set_time command is not consistent with the initial period in the database!' )
2014-06-16 17:41:59 +02:00
end
end
% Set firstobs, lastobs and nobs
if newdatainterface
if isempty ( DynareOptions . dataset . firstobs )
% first_obs option was not used in the data command.
firstobs = DynareDataset . init ;
else
firstobs = DynareOptions . dataset . firstobs ;
end
if isnan ( DynareOptions . dataset . nobs )
% nobs option was not used in the data command.
if isempty ( DynareOptions . dataset . lastobs )
% last_obs option was not used in the data command.
nobs = DynareDataset . nobs ;
lastobs = DynareDataset . dates ( end ) ;
else
lastobs = DynareOptions . dataset . lastobs ;
nobs = lastobs - firstobs + 1 ;
end
else
nobs = DynareOptions . dataset . nobs ;
if isempty ( DynareOptions . dataset . lastobs )
% last_obs option was not used in the data command.
lastobs = firstobs + ( nobs - 1 ) ;
else
% last_obs and nobs were used in the data command. Check that they are consistent (with firstobs).
if ~ isequal ( lastobs , firstobs + ( nobs - 1 ) )
2014-09-22 20:17:20 +02:00
error ( sprintf ( ' makedataset: Options last_obs (%s), first_obs (%s) and nobs (%s) are not consistent!' , char ( lastobs ) , char ( firstobs ) , num2str ( nobs ) ) ) ;
2014-06-16 17:41:59 +02:00
end
end
end
else
if isnan ( DynareOptions . first_obs )
firstobs = DynareDataset . init ;
else
firstobs = DynareDataset . dates ( DynareOptions . first_obs ) ;
end
if isnan ( DynareOptions . nobs )
lastobs = DynareDataset . dates ( end ) ;
nobs = lastobs - firstobs + 1 ;
else
nobs = DynareOptions . nobs ;
lastobs = firstobs + ( nobs - 1 ) ;
end
end
2014-06-23 10:55:08 +02:00
% Add initial conditions if needed
FIRSTOBS = firstobs - initialconditions ;
2014-06-16 17:41:59 +02:00
% Check that firstobs belongs to DynareDataset.dates
if firstobs < DynareDataset . init
2014-09-22 20:17:20 +02:00
error ( sprintf ( ' makedataset: first_obs (%s) cannot be less than the first date in the dataset (%s)!' , char ( firstobs ) , char ( DynareDataset . init ) ) )
2014-06-16 17:41:59 +02:00
end
2014-06-23 10:55:08 +02:00
% Check that FIRSTOBS belongs to DynareDataset.dates
if initialconditions && FIRSTOBS < DynareDataset . init
2014-09-22 20:17:20 +02:00
error ( sprintf ( ' makedataset: first_obs (%s) - %i cannot be less than the first date in the dataset (%s)!\nReduce the number of lags in the VAR model or increase the value of first_obs\nto at least first_obs=%i.' , char ( firstobs ) , initialconditions , char ( DynareDataset . init ) , initialconditions + 1 ) ) ;
2014-06-23 10:55:08 +02:00
end
2014-06-16 17:41:59 +02:00
% Check that lastobs belongs to DynareDataset.dates...
if newdatainterface
if lastobs > DynareDataset . dates ( end )
2014-09-22 20:17:20 +02:00
error ( sprintf ( ' makedataset: last_obs (%s) cannot be greater than the last date in the dataset (%s)!' , char ( lastobs ) , char ( DynareDataset . dates ( end ) ) ) )
2014-06-16 17:41:59 +02:00
end
else
% ... or check that nobs is smaller than the number of observations in DynareDataset.
if nobs > DynareDataset . nobs
2014-09-22 20:17:20 +02:00
error ( sprintf ( ' makedataset: nobs (%s) cannot be greater than the last date in the dataset (%s)!' , num2str ( nobs ) , num2str ( DynareDataset . nobs ) ) )
2014-06-16 17:41:59 +02:00
end
end
% Select a subsample.
2014-06-23 10:55:08 +02:00
DynareDataset = DynareDataset ( FIRSTOBS : lastobs ) ;
2014-06-16 17:41:59 +02:00
% Initialize DatasetInfo structure.
DatasetInfo = struct ( ' missing' , struct ( ' state' , NaN , ' aindex' , [ ] , ' vindex' , [ ] , ' number_of_observations' , NaN , ' no_more_missing_observations' , NaN ) , ...
' descriptive' , struct ( ' mean' , [ ] , ' covariance' , [ ] , ' correlation' , [ ] , ' autocovariance' , [ ] ) ) ;
% Fill DatasetInfo.missing if some observations are missing
DatasetInfo . missing . state = isanynan ( DynareDataset . data ) ;
if DatasetInfo . missing . state
[ DatasetInfo . missing . aindex , DatasetInfo . missing . number_of_observations , DatasetInfo . missing . no_more_missing_observations , DatasetInfo . missing . vindex ] = ...
describe_missing_data ( DynareDataset . data ) ;
else
DatasetInfo . missing . aindex = num2cell ( transpose ( repmat ( 1 : DynareDataset . vobs , DynareDataset . nobs , 1 ) ) , 1 ) ;
DatasetInfo . missing . no_more_missing_observations = 1 ;
end
% Compute the empirical mean of the observed variables.
2018-01-11 22:08:03 +01:00
DatasetInfo . descriptive . mean = nanmean ( DynareDataset . data , 1 ) ;
2014-06-16 17:41:59 +02:00
% Compute the empirical covariance matrix of the observed variables.
DatasetInfo . descriptive . covariance = nancovariance ( DynareDataset . data ) ;
% Compute the empirical correlation matrix of the observed variables.
normalization_matrix = diag ( 1. / sqrt ( diag ( DatasetInfo . descriptive . covariance ) ) ) ;
DatasetInfo . descriptive . correlation = normalization_matrix * DatasetInfo . descriptive . covariance * normalization_matrix ;
% Compute autocorrelation function.
2014-07-09 15:34:17 +02:00
DatasetInfo . descriptive . autocovariance = nanautocovariance ( DynareDataset . data , DynareOptions . ar ) ;
% Save raw data.
DatasetInfo . rawdata = DynareDataset . data ;
% Prefilter the data if needed (remove the mean).
if isequal ( DynareOptions . prefilter , 1 )
DynareDataset = DynareDataset . detrend ( ) ;
end