Improvements in load_csv_file_data
* autodetect whether there are variable names and dates in the file * optimize under Octave, using csv2cell and parcecell from io packagetime-shift
parent
eb75397e0f
commit
93416a93da
|
@ -4334,12 +4334,14 @@ univariate convergence diagnostic.
|
||||||
|
|
||||||
@item datafile = @var{FILENAME}
|
@item datafile = @var{FILENAME}
|
||||||
@anchor{datafile}
|
@anchor{datafile}
|
||||||
The datafile: a @file{.m} file, a @file{.mat} file, a @file{.csv} file, or a
|
The datafile: a @file{.m} file, a @file{.mat} file, a @file{.csv}
|
||||||
@file{.xls}/@file{.xlsx} file (the latter format is supported under Octave
|
file, or a @file{.xls}/@file{.xlsx} file (under Octave, the
|
||||||
if the @uref{http://octave.sourceforge.net/io/,io} and
|
@uref{http://octave.sourceforge.net/io/,io} from Octave-Forge is
|
||||||
@uref{http://octave.sourceforge.net/java/,java} packages from
|
required for the @file{.csv}, @file{.xls} and @file{.xlsx} formats; in
|
||||||
Octave-Forge are installed, along with a
|
addition, for the @file{.xls} and @file{.xlsx} formats, the
|
||||||
@uref{http://www.java.com/download,Java Runtime Environment})
|
@uref{http://octave.sourceforge.net/java/,java} package is required,
|
||||||
|
along with a @uref{http://www.java.com/download,Java Runtime
|
||||||
|
Environment})
|
||||||
|
|
||||||
@item xls_sheet = @var{NAME}
|
@item xls_sheet = @var{NAME}
|
||||||
@anchor{xls_sheet}
|
@anchor{xls_sheet}
|
||||||
|
|
|
@ -1,18 +1,20 @@
|
||||||
function [freq, init, data, varlist] = load_csv_file_data(file, withtime, withnames, noemptycell)
|
function [freq, init, data, varlist] = load_csv_file_data(file)
|
||||||
|
|
||||||
% Loads data in a csv file.
|
% Loads data in a csv file.
|
||||||
%
|
%
|
||||||
% INPUTS
|
% INPUTS
|
||||||
% o file string, name of the csv file (with path).
|
% o file string, name of the csv file (with path).
|
||||||
% o withtime integer scalar, nonzero iff the first column is for the dates of the observations.
|
|
||||||
% o withnames integer scalar, nonzero iff the first row is for the names of the variables.
|
|
||||||
% o noemptycell integer scalar, nonzero the csv file does not have empty cells.
|
|
||||||
%
|
%
|
||||||
% OUTPUTS
|
% OUTPUTS
|
||||||
% o freq integer scalar equal to 1, 4, 12 or 52 (for annual, quaterly, monthly or weekly frequencies).
|
% o freq integer scalar equal to 1, 4, 12 or 52 (for annual, quaterly, monthly or weekly frequencies).
|
||||||
% o init dates object, initial date in the dataset.
|
% o init dates object, initial date in the dataset.
|
||||||
% o data matrix of doubles, the data.
|
% o data matrix of doubles, the data.
|
||||||
% o varlist cell of strings, names of the variables.
|
% o varlist cell of strings, names of the variables.
|
||||||
|
%
|
||||||
|
% REMARKS
|
||||||
|
% The varlist output will be set only if the first line contains variable
|
||||||
|
% names. Similarly, if the first column does not contain dates, then
|
||||||
|
% freq will be 1 and init will be year 1.
|
||||||
|
|
||||||
% Copyright (C) 2012-2013 Dynare Team
|
% Copyright (C) 2012-2013 Dynare Team
|
||||||
%
|
%
|
||||||
|
@ -31,174 +33,45 @@ function [freq, init, data, varlist] = load_csv_file_data(file, withtime, withna
|
||||||
% You should have received a copy of the GNU General Public License
|
% You should have received a copy of the GNU General Public License
|
||||||
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
|
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
% Set defaults.
|
|
||||||
if nargin<4
|
|
||||||
noemptycell = 1;
|
|
||||||
if nargin<3
|
|
||||||
withnames = 1;
|
|
||||||
if nargin <2
|
|
||||||
withtime = 1;
|
|
||||||
if nargin<1
|
|
||||||
error('load_csv_file_data:: I need at least one input (name of the csv file)!')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if ~withtime && ~withnames && noemptycell
|
|
||||||
% Use matlab builtin routine!
|
|
||||||
data = csvread(file);
|
|
||||||
end
|
|
||||||
|
|
||||||
if ~( isequal(withtime,0) || isequal(withtime,1) )
|
|
||||||
error('load_csv_file_data:: Second input argument has to be equal to 1 or 0!')
|
|
||||||
end
|
|
||||||
|
|
||||||
if ~( isequal(withnames,0) || isequal(withnames,1) )
|
|
||||||
error('load_csv_file_data:: Third input argument has to be equal to 1 or 0!')
|
|
||||||
end
|
|
||||||
|
|
||||||
% Output initialization
|
% Output initialization
|
||||||
freq = 1; % Default frequency is annual.
|
freq = 1; % Default frequency is annual.
|
||||||
init = dates(1,1); % Default initial date is year one.
|
init = dates(1,1); % Default initial date is year one.
|
||||||
varlist = [];
|
varlist = [];
|
||||||
if ~isoctave
|
|
||||||
% Under Matlab, save time by using importdata
|
|
||||||
assert(exist(file, 'file') == 2, ['load_csv_file_data: I can''t find file ' file '!']);
|
assert(exist(file, 'file') == 2, ['load_csv_file_data: I can''t find file ' file '!']);
|
||||||
A = importdata(file, ',', withnames);
|
|
||||||
if withnames && withtime
|
if isoctave
|
||||||
if size(A.textdata, 1) == 1
|
if ~user_has_octave_forge_package('io')
|
||||||
% year dates confused for data
|
error('The io package is required to read CSV files from Octave')
|
||||||
varlist = A.textdata(1, 2:end);
|
|
||||||
init = dates([num2str(A.data(1, 1)) 'Y']);
|
|
||||||
data = A.data(:, 2:end);
|
|
||||||
else
|
|
||||||
varlist = A.textdata(1, 2:end);
|
|
||||||
init = dates(A.textdata{2, 1});
|
|
||||||
data = A.data;
|
|
||||||
end
|
end
|
||||||
elseif withnames && ~withtime
|
A = csv2cell(file);
|
||||||
varlist = A.textdata;
|
[data, T, L] = parsecell(A);
|
||||||
data = A.data;
|
withvars = L.numlimits(2,1) > L.txtlimits(2,1);
|
||||||
elseif ~withnames && withtime
|
withtime = L.numlimits(1,1) > L.txtlimits(1,1);
|
||||||
|
else
|
||||||
|
A = importdata(file);
|
||||||
if ~isstruct(A)
|
if ~isstruct(A)
|
||||||
% year dates confused for data
|
data = A;
|
||||||
init = dates([num2str(A.data(1, 1)) 'Y']);
|
T = {};
|
||||||
data = A(:, 2:end);
|
withvars = 0;
|
||||||
|
withtime = 0;
|
||||||
else
|
else
|
||||||
init = dates(A.textdata{1, 1});
|
|
||||||
data = A.data;
|
data = A.data;
|
||||||
|
T = A.textdata;
|
||||||
|
% importdata() allows text only at the top and the left, so the following
|
||||||
|
% tests are sufficient.
|
||||||
|
withvars = size(T, 2) >= size(data, 2);
|
||||||
|
withtime = size(T, 1) >= size(data, 1);
|
||||||
end
|
end
|
||||||
else
|
|
||||||
error('load_csv_file_data:: Shouldn''t arrive here');
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if withvars
|
||||||
|
varlist = T(1, 2:end);
|
||||||
|
T = T(2:end, :);
|
||||||
|
end
|
||||||
|
if withtime
|
||||||
|
init = dates(T{1, 1});
|
||||||
freq = init.freq;
|
freq = init.freq;
|
||||||
|
end
|
||||||
|
|
||||||
varlist = transpose(varlist);
|
varlist = transpose(varlist);
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
% Check if file exists.
|
|
||||||
if check_file_extension(file,'csv')
|
|
||||||
try
|
|
||||||
fid = fopen(file,'r');
|
|
||||||
catch
|
|
||||||
error(['load_csv_file_data: I can''t find file ' file '!'])
|
|
||||||
end
|
|
||||||
else
|
|
||||||
error('load_csv_file_data: Wrong file extension!')
|
|
||||||
end
|
|
||||||
|
|
||||||
% bfile contains a vector of ascii codes.
|
|
||||||
bfile = fread(fid);
|
|
||||||
|
|
||||||
% Close (csv) file.
|
|
||||||
fclose(fid);
|
|
||||||
|
|
||||||
% Set newline code (ok for *nix, check for mac and windows)
|
|
||||||
if isunix
|
|
||||||
newline_code = 10;
|
|
||||||
elseif ispc
|
|
||||||
newline_code = 13;
|
|
||||||
elseif ismac
|
|
||||||
newline_code = 10;
|
|
||||||
else
|
|
||||||
error('load_csv_file_data:: Not implemented for your OS!')
|
|
||||||
end
|
|
||||||
|
|
||||||
% Get the positions of the end-of-line code;
|
|
||||||
end_of_line_locations = find(bfile==newline_code);
|
|
||||||
if ispc && isempty(end_of_line_locations)
|
|
||||||
newline_code=10;
|
|
||||||
end_of_line_locations = find(bfile==newline_code);
|
|
||||||
end;
|
|
||||||
tmp = find(bfile==newline_code);
|
|
||||||
|
|
||||||
% Get the number of lines in the file.
|
|
||||||
ndx = length(tmp);
|
|
||||||
|
|
||||||
% Create a cell of indices for each line.
|
|
||||||
b = [1; end_of_line_locations+1];
|
|
||||||
c = [end_of_line_locations-1; length(bfile)+1];
|
|
||||||
b = b(1:end-1);
|
|
||||||
c = c(1:end-1);
|
|
||||||
|
|
||||||
linea = 1;
|
|
||||||
|
|
||||||
if withnames
|
|
||||||
% Get the first line of the csv file (names of the variables).
|
|
||||||
linee = char(transpose(bfile(b(linea):c(linea))));
|
|
||||||
% Get the content of the first line and determine the number of variables and their names.
|
|
||||||
[B,C] = get_cells_id(linee,',');
|
|
||||||
if withtime
|
|
||||||
B = B(2:end);
|
|
||||||
C = C(2:end);
|
|
||||||
end
|
|
||||||
varlist = cell(length(B),1);
|
|
||||||
number_of_variables = length(varlist);
|
|
||||||
for i=1:number_of_variables
|
|
||||||
varlist(i) = {linee(B(i):C(i))};
|
|
||||||
end
|
|
||||||
varlist = strtrim(varlist);
|
|
||||||
linea = linea+1;
|
|
||||||
end
|
|
||||||
|
|
||||||
% Get following line (number 1 or 2 depending on withnames flag)
|
|
||||||
linee = char(transpose(bfile(b(linea):c(linea))));
|
|
||||||
comma_locations = transpose(strfind(linee,','));
|
|
||||||
B = 1;
|
|
||||||
C = comma_locations(1)-1;
|
|
||||||
|
|
||||||
if withtime
|
|
||||||
tmp = linee(B:C);
|
|
||||||
% Check the dates formatting
|
|
||||||
if isnumeric(tmp) && isint(tmp)
|
|
||||||
tmp = [num2str(tmp) 'Y'];
|
|
||||||
end
|
|
||||||
if ~isdate(tmp)
|
|
||||||
error('load_csv_file_data:: Formatting error. I can''t read the dates!')
|
|
||||||
end
|
|
||||||
init = dates(tmp);
|
|
||||||
freq = init.freq;
|
|
||||||
first = 2;
|
|
||||||
else
|
|
||||||
first = 1;
|
|
||||||
end
|
|
||||||
|
|
||||||
if ~withnames
|
|
||||||
number_of_variables = length(tmp)-withtime;
|
|
||||||
end
|
|
||||||
|
|
||||||
% Initialization of matrix data.
|
|
||||||
data = zeros(ndx,number_of_variables);
|
|
||||||
|
|
||||||
% Populate data.
|
|
||||||
for linea = 1+withnames:ndx
|
|
||||||
linee = char(transpose(bfile(b(linea):c(linea))));
|
|
||||||
[B,C] = get_cells_id(linee,',');
|
|
||||||
for i=first:length(B)
|
|
||||||
data(linea,i-withtime) = str2double(linee(B(i):C(i)));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
% Remove first line if withnames
|
|
||||||
data = data(1+withnames:ndx,:);
|
|
Loading…
Reference in New Issue