dynare/matlab/+estimate/nls.m

function nls(eqname, params, data, range, optimizer, varargin)

% Estimates the parameters of a nonlinear backward equation by Nonlinear Least Squares.
%
% INPUTS
% - eqname       [string]    Name of the equation.
% - params       [struct]    Describes the parameters to be estimated.
% - data         [dseries]   Database for the estimation
% - range        [dates]     Range of dates for the estimation.
% - optimizer    [string]    Set the optimization algorithm. Possible values
%                            are 'fmincon', 'fminunc', 'csminwel',
%                            'fminsearch', 'simplex' and
%                            'annealing'. Default is 'csminwel'.
% - varargin                 List of key/value pairs, each key must be a
%                            string and values can be strings or real numbers.
%
% OUTPUTS
% - none
%
% REMARKS
% [1] The estimation results are printed in the command line window, and the
%     parameters are updated accordingly in M_.params.
% [2] The second input is a structure. Each fieldname corresponds to the
%     name of an estimated parameter, the value of the field is the initial
%     guess used for the estimation (by NLS).
% [3] The third input is a dseries object which must at least contain all
%     the variables appearing in the estimated equation. The residual of the
%     equation must have NaN values in the object.
% [4] It is assumed that the residual is additive.

% Copyright © 2021-2023 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare.  If not, see <https://www.gnu.org/licenses/>.

global M_ oo_ options_

% Read equation to be estimated.
[LHS, RHS] = get_lhs_and_rhs(eqname, M_, true);  % Without introducing the auxiliaries
[lhs, rhs] = get_lhs_and_rhs(eqname, M_);        % With auxiliaries.

islaggedvariables = ~isempty(regexp(rhs, '\w+\(-(\d+)\)', 'match')); % Do we have lags on the RHS (with aux)?

if ~isempty(regexp(rhs, '\w+\(\d+\)', 'match'))
    error('Cannot estimate an equation ith leads.')
end

% Update database (for auxiliaries)
if M_.endo_nbr>M_.orig_endo_nbr
    data = feval([M_.fname '.dynamic_set_auxiliary_series'], data, M_.params);
end

%
% Check estimation range.
%

levels = regexp(RHS, '[\w+]\(-(?<lags>\d)\)', 'names');
diffs = regexp(RHS, 'diff\(\w+\)|diff\(log\(\w+\)\)', 'match');
diffl = regexp(RHS, 'diff\([\w+]\(-(?<lags>\d)\)\)|diff\(log\([\w+]\(-(?<lags>\d)\)\)\)', 'names');
ddiffs = regexp(RHS, 'diff\(diff\(\w+\)\)|diff\(diff\(log\(\w+\)\)\)', 'match');
ddiffl = regexp(RHS, 'diff\(diff\([\w+]\(-(?<lags>\d)\)\)\)|diff\(diff\(log\([\w+]\(-(?<lags>\d)\)\)\)\)', 'names');

if isempty(levels)
    llag = 0;
else
    llag = max(cellfun(@str2num, {levels(:).lags}));
end

if ~isempty(diffs)
    llag = max(1, llag);
end

if ~isempty(ddiffs)
    llag = max(2, llag);
end

if ~isempty(diffl)
    llag = max(max(cellfun(@str2num, {diffl(:).lags}))+1, llag);
end

if ~isempty(ddiffl)
    llag = max(max(cellfun(@str2num, {ddiffl(:).lags}))+2, llag);
end

minperiod = data.dates(llag+1);

if minperiod>range(1)
    error('Wrong range. Initial period cannot be smaller than %s', char(minperiod))
end

if range(end)>data.dates(end)
    error('Wrong range. Terminal period cannot be greater than %s', char(data.dates(end)))
end

% Copy (sub)sample data in a matrix.
if islaggedvariables
    DATA = data([range(1)-1, range]).data;
else
    % No lagged variables in the RHS
    DATA = data(range).data;
end

% Get the parameters and variables in the equation.
[pnames, enames, xnames, pid, eid, xid] = get_variables_and_parameters_in_equation(lhs, rhs, M_);

%
% Check that we have only one residual in the equation (we may have observed exogenous variables).
%

% Set the number of exogenous variables.
xnbr = length(xnames);

% Test if we have a residual and get its name (-> rname).
if isequal(xnbr, 1)
    rname = M_.exo_names{strcmp(xnames{1}, M_.exo_names)};
    if ~all(isnan(data{xnames{1}}.data))
        error('The residual (%s) must have NaN values in the provided database.', xnames{1})
    end
else
    % We have observed exogenous variables in the estimated equation.
    tmp = data{xnames{:}}(range).data;
    idx = find(all(~isnan(tmp))); % Indices for the observed exogenous variables.
    if isequal(length(idx), length(xnames))
        error('There is no residual in this equation, all the exogenous variables are observed.')
    else
        if length(idx)<length(xnames)-1
            error('It is not allowed to have more than one residual in an equation')
        end
        irname = setdiff(1:length(xnames), idx);
        rname = xnames{irname};
    end
end

% Remove residuals from the equation. Note that a plus or minus will remain in the equation
rhs = regexprep(rhs, rname, '');

% FIXME The JSON output for rhs (with aux variables substitutions) is not always
% the same regarding to the position of the residual. If the residual appears at
% the end of the equation, after the removal of rname the rhs will end with a +
% symbol which will result in a crash later when evaluating the sum of square
% residuals. If the residual appears at the begining of the equation, after the
% removal of rname the rhs will begin with a + symbol which is just awful (but
% will not cause any trouble).

% Remove trailing + (if any, introduced when removing residual)
if isequal(rhs(end), '+')
    rhs = rhs(1:end-1);
end

% Remove leading + (if any, introduced when removing residual)
if isequal(rhs(1), '+')
    rhs = rhs(2:end);
end

%
% Rewrite and print the equation.
%

[rhs, lhs] = rewrite_equation_with_tables(rhs, lhs, islaggedvariables, pnames, enames, xnames, pid, eid, xid, data);

% Get list and indices of estimated parameters.
ipnames_ = get_estimated_parameters_indices(params, pnames, eqname, M_);

% Create a routine for evaluating the residuals of the nonlinear model
write_residuals_routine(lhs, rhs, eqname, ipnames_, M_);

% Create a routine for evaluating the sum of squared residuals of the nonlinear model
write_ssr_routine(lhs, rhs, eqname, ipnames_, M_);

% Create a function handle returning the sum of square residuals for a given vector of parameters.
ssrfun = @(p) feval([M_.fname '.ssr_' eqname], p, DATA, M_, oo_);

% Create a function handle returning the sum of square residuals for a given vector of parameters.
resfun = @(p) feval([M_.fname '.r_' eqname], p, DATA, M_, oo_);

%
% Prepare call to the optimization routine.
%

% Set initial condition.
params0 = cell2mat(struct2cell(params));

is_gauss_newton = false;
is_lsqnonlin = false;

if nargin>4 && (isequal(optimizer, 'GaussNewton') || isequal(optimizer, 'lsqnonlin'))
    switch optimizer
      case 'GaussNewton'
        is_gauss_newton = true;
      case 'lsqnonlin'
        is_lsqnonlin = true;
    end
end

% Set defaults for optimizers
bounds = [];
parameter_names = [];

% Set optimizer routine.
if nargin<5 || isempty(optimizer)
    % Use csminwel by default.
    minalgo = 4;
else
    switch optimizer
      case 'GaussNewton'
        % Nothing to do here.
      case 'lsqnonlin'
        bounds = ones(length(params0),1)*[-Inf,Inf];
      case 'fmincon'
        if isoctave && ~user_has_octave_forge_package('optim', '1.6')
            error('Optimization algorithm ''fmincon'' requires the optim package, version 1.6 or higher')
        elseif ~isoctave && ~user_has_matlab_license('optimization_toolbox')
            error('Optimization algorithm ''fmincon'' requires the Optimization Toolbox')
        end
        minalgo = 1;
        bounds = ones(length(params0),1)*[-Inf,Inf];
      case 'fminunc'
        if isoctave && ~user_has_octave_forge_package('optim')
            error('Optimization algorithm ''fminunc'' requires the optim package')
        elseif ~isoctave && ~user_has_matlab_license('optimization_toolbox')
            error('Optimization algorithm ''fminunc'' requires the Optimization Toolbox')
        end
        minalgo = 3;
      case 'csminwel'
        minalgo = 4;
      case 'fminsearch'
        if isoctave && ~user_has_octave_forge_package('optim')
            error('Optimization algorithm ''fminsearch'' requires the optim package')
        elseif ~isoctave && ~user_has_matlab_license('optimization_toolbox')
            error('Optimization algorithm ''fminsearch'' requires the Optimization Toolbox')
        end
        minalgo = 7;
      case 'simplex'
        minalgo = 8;
      case 'annealing'
        minalgo = 2;
        bounds = ones(length(params0),1)*[-Inf,Inf];
        parameter_names = fieldnames(params);
      case 'particleswarm'
        if isoctave
            error('Optimization ''particleswarm'' is not available under Octave')
        elseif ~user_has_matlab_license('global_optimization_toolbox')
            error('Optimization ''particleswarm'' requires the Global Optimization Toolbox')
        end
        minalgo = 12;
        bounds = ones(length(params0),1)*[-Inf,Inf];
        parameter_names = fieldnames(params);
      otherwise
        msg = sprintf('%s is not an implemented optimization routine.\n', optimizer);
        msg = sprintf('%sAvailable algorithms are:\n', msg);
        msg = sprintf('%s - %s\n', msg, 'fmincon');
        msg = sprintf('%s - %s\n', msg, 'fminunc');
        msg = sprintf('%s - %s\n', msg, 'csminwel');
        msg = sprintf('%s - %s\n', msg, 'fminsearch');
        msg = sprintf('%s - %s\n', msg, 'simplex');
        msg = sprintf('%s - %s\n', msg, 'annealing');
        msg = sprintf('%s - %s\n', msg, 'lsqnonlin');
        msg = sprintf('%s - %s\n', msg, 'GaussNewton');
        error(msg)
    end
end

% Set options if provided as input arguments to nls routine.
oldopt = options_.optim_opt;
[noprint, opt] = opt4nls(varargin);
options_.optim_opt = opt;

%
% Check that we are able to evaluate the Sum of uared Residuals on the initial guess
%

ssr0 = ssrfun(params0);

if isnan(ssr0) || isinf(ssr0) || ~isreal(ssr0)
    error('Cannot evaluate the Sum of Squared Residuals on the initial guess.')
end

%
% Call optimization routine (NLS)
%

if is_gauss_newton
    [params1, SSR, exitflag] = gauss_newton(resfun, params0);
elseif is_lsqnonlin
    if ismember('levenberg-marquardt', varargin)
        % Levenberg Marquardt does not handle boundary constraints.
        [params1, SSR, ~, exitflag] = lsqnonlin(resfun, params0, [], [], optimset(varargin{:}));
    else
        [params1, SSR, ~, exitflag] = lsqnonlin(resfun, params0, bounds(:,1), bounds(:,2), optimset(varargin{:}));
    end
else
    % Estimate the parameters by minimizing the sum of squared residuals.
    [params1, SSR, exitflag] = dynare_minimize_objective(ssrfun, params0, ...
                                                         minalgo, ...
                                                         options_, ...
                                                         bounds, ...
                                                         parameter_names, ...
                                                         [], ...
                                                         []);
end

% Revert local modifications to the options.
options_.optim_opt = oldopt;

% Compute an estimator of the covariance matrix (see White and
% Domovitz [Econometrica, 1984], theorem 3.2).
[r, J] = jacobian(resfun, params1, 1e-6);
T = length(r);
A = 2.0*(J'*J)/T;
J = bsxfun(@times, J, r);
B = J'*J;
l = round(T^.25);
for tau=1:l
    B = B + (1-tau/(l+1))*(J(tau+1:end,:)'*J(1:end-tau,:)+J(1:end-tau,:)'*J(tau+1:end,:));
end
B = (4.0/T)*B;
C = inv(A)*B*inv(A); % C is the asymptotic covariance of sqrt(T) times the vector of estimated parameters.
C = C/T;

% Save results
dlhs = eval(strrep(lhs, 'data', 'data(range(1)-real(islaggedvariables):range(end)).data')); % REMARK: If lagged variables are present in the estimated equation (ie rhs) then an observation
                                                                                            % has been appended to the dataset (period range(1)-1) and the left hand side has been replaced
                                                                                            % by data(2:end,lhs_id) in the matlab routine generated to evaluate the residuals (it is also the value
                                                                                            % hold by string variable lhs). Hence to evaluate the left hand side variable used for estimation,
                                                                                            % between range(1) and range(end) we need here to append the same observation in period range(1)-1.
oo_.nls.(eqname).lhs = dseries(dlhs, range(1), sprintf('%s_lhs', eqname));
oo_.nls.(eqname).fit = dseries(dlhs-r, range(1), sprintf('%s_fit', eqname));
oo_.nls.(eqname).residual = dseries(r, range(1), sprintf('%s_residual', eqname));
oo_.nls.(eqname).ssr = SSR;
oo_.nls.(eqname).s2 = SSR/T;
oo_.nls.(eqname).R2 = 1-var(r)/var(dlhs);
oo_.nls.(eqname).pnames = fieldnames(params);
oo_.nls.(eqname).beta = params1;
oo_.nls.(eqname).covariance = C;
oo_.nls.(eqname).tstat = params1./(sqrt(diag(C)));

% Also save estimated parameters in M_
M_.params(ipnames_) = params1;

if ~noprint
    title = ['NLS Estimation of equation ''' eqname ''''];
    preamble = {['Dependent Variable: ' LHS], ...
                sprintf('Observations: %d from %s to %s\n', (range(end)-range(1))+1, range(1).char, range(end).char)};

    afterward = {sprintf('R^2: %f', oo_.nls.(eqname).R2), ...
                 sprintf('s^2: %f', oo_.nls.(eqname).s2)}; ...

    dyn_table(title, preamble, afterward, oo_.nls.(eqname).pnames, ...
              {'Estimates','t-statistic','Std. Error'}, 4, ...
              [oo_.nls.(eqname).beta oo_.nls.(eqname).tstat sqrt(diag(C))]);
end