From 9430b4e9ca3427be6edf476c357ba46e1614e642 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Wed, 22 Jul 2020 17:57:12 +0200
Subject: [PATCH 1/6] Block trust region MEX: add debugging information

---
 .../block_trust_region/mexFunction.f08        | 27 ++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/mex/sources/block_trust_region/mexFunction.f08 b/mex/sources/block_trust_region/mexFunction.f08
index 7377af47c..95bfb4c80 100644
--- a/mex/sources/block_trust_region/mexFunction.f08
+++ b/mex/sources/block_trust_region/mexFunction.f08
@@ -34,9 +34,11 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
   real(real64), parameter :: tolf = 1e-6_real64
   real(real64), dimension(:), allocatable :: fvec
   real(real64), dimension(:,:), allocatable :: fjac
+  logical :: debug
+  character(len=80) :: debug_msg
 
-  if (nrhs < 2 .or. nlhs /= 2) then
-     call mexErrMsgTxt("Must have at least 2 inputs and exactly 2 outputs")
+  if (nrhs < 3 .or. nlhs /= 2) then
+     call mexErrMsgTxt("Must have at least 3 inputs and exactly 2 outputs")
      return
   end if
 
@@ -50,8 +52,14 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
      return
   end if
 
+  if (.not. (mxIsLogicalScalar(prhs(3)))) then
+     call mexErrMsgTxt("Third argument should be a logical scalar")
+     return
+  end if
+
   func => prhs(1)
-  extra_args => prhs(3:nrhs)
+  debug = mxGetScalar(prhs(3)) == 1._c_double
+  extra_args => prhs(4:nrhs)
   associate (x_mat => mxGetPr(prhs(2)))
     allocate(x(size(x_mat)))
     x = x_mat
@@ -65,9 +73,20 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
   call matlab_fcn(x, fvec, fjac)
   call dm_blocks(fjac, blocks)
 
+  if (debug) then
+     write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): number of blocks = ', i0)") size(blocks)
+     call mexPrintf_trim_newline(debug_msg)
+  end if
+
   ! Solve the system, starting from bottom-rightmost block
   x_all => x
   do i = size(blocks),1,-1
+     if (debug) then
+        write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' of size ', i0)") &
+             i, size(blocks(i)%col_indices)
+        call mexPrintf_trim_newline(debug_msg)
+     end if
+
      block
        real(real64), dimension(size(blocks(i)%col_indices)) :: x_block
        x_indices => blocks(i)%col_indices
@@ -88,6 +107,8 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
   nullify(x_indices, f_indices, x_all)
   call matlab_fcn(x, fvec)
   if (maxval(abs(fvec)) > tolf) then
+     if (debug) &
+          call mexPrintf_trim_newline("DYNARE_SOLVE (solve_algo=13|14): residuals still too large, solving for the whole model")
      call trust_region_solve(x, matlab_fcn, info, tolf = tolf)
   else
      info = 1

From adf1fdb009a87ef54cf8f5a28f5a9ceb056c1c90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Wed, 22 Jul 2020 18:01:31 +0200
Subject: [PATCH 2/6] Block trust region MEX: add safety check for squareness
 of blocks

---
 mex/sources/block_trust_region/mexFunction.f08 | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mex/sources/block_trust_region/mexFunction.f08 b/mex/sources/block_trust_region/mexFunction.f08
index 95bfb4c80..d375c4250 100644
--- a/mex/sources/block_trust_region/mexFunction.f08
+++ b/mex/sources/block_trust_region/mexFunction.f08
@@ -91,6 +91,10 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
        real(real64), dimension(size(blocks(i)%col_indices)) :: x_block
        x_indices => blocks(i)%col_indices
        f_indices => blocks(i)%row_indices
+       if (size(x_indices) /= size(f_indices)) then
+          call mexErrMsgTxt("Non-square block")
+          return
+       end if
        x_block = x(x_indices)
        call trust_region_solve(x_block, matlab_fcn, info, tolf = tolf)
        x(x_indices) = x_block

From 0b41d17374488f1357ec9cce0e0a9b36aac790ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Mon, 27 Jul 2020 17:34:13 +0200
Subject: [PATCH 3/6] dynare_solve: fix display of number of dmperm blocks

---
 matlab/dynare_solve.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/matlab/dynare_solve.m b/matlab/dynare_solve.m
index 3bb7ecc03..079bc1429 100644
--- a/matlab/dynare_solve.m
+++ b/matlab/dynare_solve.m
@@ -251,7 +251,7 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14])
     [j1,j2,r,s] = dmperm(fjac);
     JAC = abs(fjac(j1,j2))>0;
     if options.debug
-        disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r))]);
+        disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r)-1)]);
     end
     l = 0;
     fre = false;

From 35a162c6a6cebd9e5d2b9ede9db58f9442ea6809 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Mon, 7 Sep 2020 16:13:59 +0200
Subject: [PATCH 4/6] Block trust region MEX: fix memory leak in MATLAB
 function closure

---
 mex/sources/block_trust_region/matlab_fcn_closure.f08 | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mex/sources/block_trust_region/matlab_fcn_closure.f08 b/mex/sources/block_trust_region/matlab_fcn_closure.f08
index 9565d8f74..d6fc00ea8 100644
--- a/mex/sources/block_trust_region/matlab_fcn_closure.f08
+++ b/mex/sources/block_trust_region/matlab_fcn_closure.f08
@@ -11,7 +11,7 @@
 ! be set in order to give the input values for the indices that are not passed
 ! to matlab_fcn.
 
-! Copyright © 2019 Dynare Team
+! Copyright © 2019-2020 Dynare Team
 !
 ! This file is part of Dynare.
 !
@@ -80,12 +80,16 @@ contains
     if (mexCallMATLAB(nlhs, call_lhs, int(size(call_rhs), c_int), call_rhs, "feval") /= 0) &
          call mexErrMsgTxt("Error calling function to be solved")
 
+    call mxDestroyArray(call_rhs(2))
+
     fvec_all => mxGetPr(call_lhs(1))
     if (associated(f_indices)) then
        fvec = fvec_all(f_indices)
     else
        fvec = fvec_all
     end if
+    call mxDestroyArray(call_lhs(1))
+
     if (present(fjac)) then
        fjac_all(1:n_all,1:n_all) => mxGetPr(call_lhs(2))
        if (associated(x_indices) .and. associated(f_indices)) then
@@ -93,6 +97,7 @@ contains
        else
           fjac = fjac_all
        end if
+       call mxDestroyArray(call_lhs(2))
     end if
   end subroutine matlab_fcn
 end module matlab_fcn_closure

From 7e21bf2a10189ef4c1d8673a7f736c294d23d4ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Tue, 8 Sep 2020 16:23:32 +0200
Subject: [PATCH 5/6] =?UTF-8?q?Block=20trust=20region=20MEX:=20use=20MATLA?=
 =?UTF-8?q?B=E2=80=99s=20dmperm=20for=20the=20Dulmage-Mendelsohn=20decompo?=
 =?UTF-8?q?sition?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It turns out that MATLAB’s implementation is significantly faster than my own
Fortran implementation.
---
 .../block_trust_region/dulmage_mendelsohn.f08 | 585 ++----------------
 1 file changed, 35 insertions(+), 550 deletions(-)

diff --git a/mex/sources/block_trust_region/dulmage_mendelsohn.f08 b/mex/sources/block_trust_region/dulmage_mendelsohn.f08
index 45e48d82a..56122bedb 100644
--- a/mex/sources/block_trust_region/dulmage_mendelsohn.f08
+++ b/mex/sources/block_trust_region/dulmage_mendelsohn.f08
@@ -1,16 +1,7 @@
-! Implementation of the Dulmage-Mendelsohn decomposition
-!
-! Closely follows the description of Pothen and Fan (1990), “Computing the
-! Block Triangular Form of a Sparse Matrix”, ACM Transactions on Mathematical
-! Software, Vol. 16, No. 4, pp. 303–324
-!
-! In addition, also computes the directed acyclic graph (DAG) formed by the
-! fine blocks, in order to optimize the parallel resolution of a linear system.
-! The last (i.e. lower-right) block has no predecessor (since this is an
-! *upper* block triangular form), and the other blocks may have predecessors if
-! they use variables computed by blocks further on the right.
+! Wrapper around MATLAB’s dmperm to compute the Dulmage-Mendelsohn
+! decomposition
 
-! Copyright © 2019 Dynare Team
+! Copyright © 2020 Dynare Team
 !
 ! This file is part of Dynare.
 !
@@ -29,559 +20,53 @@
 
 module dulmage_mendelsohn
   use iso_fortran_env
+  use matlab_mex
   implicit none
 
-  private
-  public :: dm_block, dmperm, dm_blocks
-
   ! Represents a block in the fine DM decomposition
   type :: dm_block
      integer, dimension(:), allocatable :: row_indices, col_indices
-     character :: coarse_type ! Place in the coarse decomposition, either 'H', 'S' or 'V'
-     integer, dimension(:), allocatable :: predecessors, successors ! Predecessors and successors in the DAG formed by fine DM blocks
   end type dm_block
-
-  type :: vertex
-     integer, dimension(:), allocatable :: edges
-     integer :: id ! Index of row of column represented by this vertex
-  end type vertex
-
-  type :: matrix_graph
-     type(vertex), dimension(:), allocatable :: row_vertices, col_vertices
-     integer, dimension(:), allocatable :: row_matching, col_matching ! Maximum matching
-     character, dimension(:), allocatable :: row_coarse, col_coarse ! Coarse decomposition
-     type(dm_block), dimension(:), allocatable :: fine_blocks ! Fine decomposition
-     integer, dimension(:), allocatable :: row_fine_block, col_fine_block ! Maps rows/cols to their block index
-   contains
-     procedure :: init => matrix_graph_init
-     procedure :: maximum_matching => matrix_graph_maximum_matching
-     procedure :: coarse_decomposition => matrix_graph_coarse_decomposition
-     procedure :: fine_decomposition => matrix_graph_fine_decomposition
-     procedure :: fine_blocks_dag => matrix_graph_fine_blocks_dag
-  end type matrix_graph
-
 contains
-  ! Initialize a matrix_graph object, given a matrix
-  subroutine matrix_graph_init(this, mat)
-    class(matrix_graph), intent(inout) :: this
-    real(real64), dimension(:, :), intent(in) :: mat
-
-    integer :: i, j
-
-    if (size(mat, 1) < size(mat, 2)) &
-         error stop "Matrix has less rows than columns; consider transposing it"
-
-    ! Create row vertices
-    allocate(this%row_vertices(size(mat, 1)))
-    do i = 1, size(mat, 1)
-       this%row_vertices(i)%edges = pack([ (j, j=1,size(mat, 2)) ], mat(i, :) /= 0_real64)
-       this%row_vertices(i)%id = i
-    end do
-
-    ! Create column vertices
-    allocate(this%col_vertices(size(mat, 2)))
-    do i = 1, size(mat, 2)
-       this%col_vertices(i)%edges = pack([ (j, j=1, size(mat, 1)) ], mat(:, i) /= 0_real64)
-       this%col_vertices(i)%id = i
-    end do
-  end subroutine matrix_graph_init
-
-  ! Computes the maximum matching for this graph
-  subroutine matrix_graph_maximum_matching(this)
-    class(matrix_graph), intent(inout) :: this
-
-    logical, dimension(size(this%row_vertices)) :: row_visited
-    integer, dimension(:), allocatable :: col_unmatched, col_unmatched_new
-    integer :: i, j
-
-    if (allocated(this%row_matching) .or. allocated(this%col_matching)) &
-         error stop "Maximum matching already computed"
-
-    allocate(this%row_matching(size(this%row_vertices)))
-    allocate(this%col_matching(size(this%col_vertices)))
-
-    this%row_matching = 0
-    this%col_matching = 0
-    allocate(col_unmatched(0))
-
-    ! Compute cheap matching
-    do i = 1, size(this%col_vertices)
-       match_col: do j = 1, size(this%col_vertices(i)%edges)
-          associate (v => this%col_vertices(i)%edges(j))
-            if (this%row_matching(v) == 0) then
-               this%row_matching(v) = i
-               this%col_matching(i) = v
-               exit match_col
-            end if
-          end associate
-       end do match_col
-       if (this%col_matching(i) == 0) col_unmatched = [ col_unmatched, i ]
-    end do
-
-    ! Augment matching
-    allocate(col_unmatched_new(0))
-    do
-       row_visited = .false.
-       do i = 1, size(col_unmatched)
-          call search_augmenting_path(col_unmatched(i))
-       end do
-       if (size(col_unmatched) == size(col_unmatched_new)) exit
-       call move_alloc(from=col_unmatched_new, to=col_unmatched)
-       allocate(col_unmatched_new(0))
-    end do
-  contains
-    ! Depth-first search for an augmenting path
-    ! The algorithm is written iteratively (and not recursively), because
-    ! otherwise the stack could overflow on large matrices
-    subroutine search_augmenting_path(col)
-      integer, intent(in) :: col
-
-      integer, dimension(:), allocatable :: row_path, col_path ! Path visited so far
-      integer, dimension(size(this%col_vertices)) :: col_edge ! For each column, keeps the last visited edge index
-      integer :: i
-
-      col_edge = 0
-      allocate(col_path(1))
-      col_path(1) = col
-      allocate(row_path(0))
-
-      do
-         ! Depth-first search, starting from last column in the path
-         associate (current_col => col_path(size(col_path)))
-           col_edge(current_col) = col_edge(current_col) + 1
-           if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then
-              ! We visited all edges from this column, need to backtrack
-              if (size(col_path) == 1) then
-                 ! The search failed
-                 col_unmatched_new = [ col_unmatched_new, col ]
-                 return
-              end if
-              col_path = col_path(:size(col_path)-1)
-              row_path = row_path(:size(row_path)-1)
-           else
-              associate (current_row => this%col_vertices(current_col)%edges(col_edge(current_col)))
-                if (.not. row_visited(current_row)) then
-                   row_visited(current_row) = .true.
-                   row_path = [ row_path, current_row ]
-                   if (this%row_matching(current_row) == 0) then
-                      ! We found an augmenting path, update matching and return
-                      do i = 1, size(col_path)
-                         this%row_matching(row_path(i)) = col_path(i)
-                         this%col_matching(col_path(i)) = row_path(i)
-                      end do
-                      return
-                   end if
-                   col_path = [ col_path, this%row_matching(current_row)]
-                end if
-              end associate
-           end if
-         end associate
-      end do
-    end subroutine search_augmenting_path
-  end subroutine matrix_graph_maximum_matching
-
-  ! Computes the coarse decomposition
-  ! The {row,col}_coarse arrays are filled with characters 'H', 'S' and 'V'
-  subroutine matrix_graph_coarse_decomposition(this)
-    class(matrix_graph), intent(inout) :: this
-
-    integer :: i
-
-    if (allocated(this%row_coarse) .or. allocated(this%col_coarse)) &
-         error stop "Coarse decomposition already computed"
-
-    if (.not. (allocated(this%row_matching) .and. allocated(this%col_matching))) &
-         error stop "Maximum matching not yet computed"
-
-    allocate(this%row_coarse(size(this%row_vertices)))
-    allocate(this%col_coarse(size(this%col_vertices)))
-
-    ! Initialize partition
-    this%row_coarse = 'S'
-    this%col_coarse = 'S'
-
-    ! Identify A_h
-    do i = 1, size(this%col_vertices)
-       if (this%col_matching(i) /= 0) cycle
-       this%col_coarse(i) = 'H'
-       call alternating_dfs_col(i)
-    end do
-
-    ! Identify A_v
-    do i = 1, size(this%row_vertices)
-       if (this%row_matching(i) /= 0) cycle
-       this%row_coarse(i) = 'V'
-       call alternating_dfs_row(i)
-    end do
-  contains
-    ! Depth-first search along alternating path, starting from a column
-    ! Written iteratively, to avoid stack overflow on large matrices
-    subroutine alternating_dfs_col(col)
-      integer, intent(in) :: col
-
-      integer, dimension(size(this%col_vertices)) :: col_edge
-      integer, dimension(:), allocatable :: col_path ! path visited so far
-
-      col_edge = 0
-      allocate(col_path(1))
-      col_path(1) = col
-
-      do
-         associate (current_col => col_path(size(col_path)))
-           col_edge(current_col) = col_edge(current_col) + 1
-           if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then
-              ! We visited all edges from this column, need to backtrack
-              if (size(col_path) == 1) return
-              col_path = col_path(:size(col_path)-1)
-           else
-              associate (current_row => this%col_vertices(current_col)%edges(col_edge(current_col)))
-                if (this%row_coarse(current_row) /= 'H') then
-                   this%row_coarse(current_row) = 'H'
-                   if (this%row_matching(current_row) /= 0) then
-                      this%col_coarse(this%row_matching(current_row)) = 'H'
-                      col_path = [ col_path, this%row_matching(current_row)]
-                   end if
-                end if
-              end associate
-           end if
-         end associate
-      end do
-    end subroutine alternating_dfs_col
-
-    ! Depth-first search along alternating path, starting from a row
-    ! This is the perfect symmetric of alternating_dfs_col (swapping "col" and "row")
-    subroutine alternating_dfs_row(row)
-      integer, intent(in) :: row
-
-      integer, dimension(size(this%row_vertices)) :: row_edge
-      integer, dimension(:), allocatable :: row_path ! path visited so far
-
-      row_edge = 0
-      allocate(row_path(1))
-      row_path(1) = row
-
-      do
-         associate (current_row => row_path(size(row_path)))
-           row_edge(current_row) = row_edge(current_row) + 1
-           if (row_edge(current_row) > size(this%row_vertices(current_row)%edges)) then
-              ! We visited all edges from this row, need to backtrack
-              if (size(row_path) == 1) return
-              row_path = row_path(:size(row_path)-1)
-           else
-              associate (current_col => this%row_vertices(current_row)%edges(row_edge(current_row)))
-                if (this%col_coarse(current_col) /= 'V') then
-                   this%col_coarse(current_col) = 'V'
-                   if (this%col_matching(current_col) /= 0) then
-                      this%row_coarse(this%col_matching(current_col)) = 'V'
-                      row_path = [ row_path, this%col_matching(current_col)]
-                   end if
-                end if
-              end associate
-           end if
-        end associate
-      end do
-    end subroutine alternating_dfs_row
-  end subroutine matrix_graph_coarse_decomposition
-
-  ! Computes the coarse decomposition
-  ! Allocate and fills the this%row_fine_* and this%col_fine* arrays
-  subroutine matrix_graph_fine_decomposition(this)
-    class(matrix_graph), intent(inout) :: this
-
-    ! Index of next block to be discovered
-    integer :: block_index
-
-    ! Used in DFS (both in dfs_H_or_V and strongconnect)
-    integer, dimension(size(this%row_vertices)) :: row_edge
-    integer, dimension(size(this%col_vertices)) :: col_edge
-
-    ! For dfs_H_or_V
-    logical, dimension(size(this%row_vertices)) :: row_visited
-    logical, dimension(size(this%col_vertices)) :: col_visited
-
-    ! For strongconnect
-    integer, dimension(:), allocatable :: col_stack
-    logical, dimension(size(this%col_vertices)) :: col_on_stack
-    integer, dimension(size(this%col_vertices)) :: col_index, col_lowlink
-    integer :: index
-
-    integer :: i
-
-    if (allocated(this%fine_blocks) &
-         .or. allocated(this%row_fine_block) .or. allocated(this%col_fine_block)) &
-         error stop "Fine decomposition already computed"
-
-    if (.not. (allocated(this%row_coarse) .and. allocated(this%col_coarse))) &
-         error stop "Coarse decomposition not yet computed"
-
-    allocate(this%row_fine_block(size(this%row_vertices)))
-    allocate(this%col_fine_block(size(this%col_vertices)))
-    allocate(this%fine_blocks(0))
-    block_index = 1
-
-    row_visited = .false.
-    col_visited = .false.
-    row_edge = 0
-    col_edge = 0
-
-    ! Compute blocks in H
-    do i = 1, size(this%row_vertices)
-       if (this%row_coarse(i) == 'H' .and. .not. row_visited(i)) call dfs_H_or_V(i, .true., 'H')
-    end do
-    do i = 1, size(this%col_vertices)
-       if (this%col_coarse(i) == 'H' .and. .not. col_visited(i)) call dfs_H_or_V(i, .false., 'H')
-    end do
-
-    ! Compute blocks in S (Tarjan algorithm on graph consisting only of column
-    ! vertices, where rows have been merged with their matching columns)
-    allocate(col_stack(0))
-    col_on_stack = .false.
-    index = 0
-    col_index = -1
-    do i = 1, size(this%col_vertices)
-       if (this%col_coarse(i) == 'S' .and. col_index(i) == -1) call strongconnect(i)
-    end do
-
-    ! Compute blocks in V
-    do i = 1, size(this%row_vertices)
-       if (this%row_coarse(i) == 'V' .and. .not. row_visited(i)) call dfs_H_or_V(i, .true., 'V')
-    end do
-    do i = 1, size(this%col_vertices)
-       if (this%col_coarse(i) == 'V' .and. .not. col_visited(i)) call dfs_H_or_V(i, .false., 'V')
-    end do
-  contains
-    ! Depth-first search for identifying one block inside the H or V sub-matrix
-    subroutine dfs_H_or_V(id, is_row, coarse_type)
-      integer, intent(in) :: id            ! Start vertex ID
-      logical, intent(in) :: is_row        ! Whether start vertex is a row
-      character, intent(in) :: coarse_type ! Either 'H' or 'V'
-
-      integer, dimension(:), allocatable :: path
-      type(dm_block) :: new_block
-
-      allocate(path(1))
-      path(1) = id
-
-      if (is_row) then
-         this%row_fine_block(id) = block_index
-         new_block%row_indices = [ id ]
-         allocate(new_block%col_indices(0))
-         row_visited(id) = .true.
-      else
-         this%col_fine_block(id) = block_index
-         new_block%col_indices = [ id ]
-         allocate(new_block%row_indices(0))
-         col_visited(id) = .true.
-      end if
-
-      do
-         associate (current_id => path(size(path)))
-           if ((mod(size(path), 2) == 0) .neqv. is_row) then ! Current vertex is a row
-              row_edge(current_id) = row_edge(current_id) + 1
-              if (row_edge(current_id) > size(this%row_vertices(current_id)%edges)) then
-                 if (size(path) == 1) exit
-                 path = path(:size(path)-1)
-              else
-                 associate (next_id => this%row_vertices(current_id)%edges(row_edge(current_id)))
-                   if (this%col_coarse(next_id) == coarse_type .and. .not. col_visited(next_id)) then
-                      col_visited(next_id) = .true.
-                      path = [ path, next_id ]
-                      this%col_fine_block(next_id) = block_index
-                      new_block%col_indices = [ new_block%col_indices, next_id ]
-                   end if
-                 end associate
-              end if
-           else ! Current vertex is a column
-              col_edge(current_id) = col_edge(current_id) + 1
-              if (col_edge(current_id) > size(this%col_vertices(current_id)%edges)) then
-                 if (size(path) == 1) exit
-                 path = path(:size(path)-1)
-              else
-                 associate (next_id => this%col_vertices(current_id)%edges(col_edge(current_id)))
-                   if (this%row_coarse(next_id) == coarse_type .and. .not. row_visited(next_id)) then
-                      row_visited(next_id) = .true.
-                      path = [ path, next_id ]
-                      this%row_fine_block(next_id) = block_index
-                      new_block%row_indices = [ new_block%row_indices, next_id ]
-                   end if
-                 end associate
-              end if
-           end if
-         end associate
-      end do
-
-      new_block%coarse_type = coarse_type
-      this%fine_blocks = [ this%fine_blocks, new_block ]
-      block_index = block_index + 1
-    end subroutine dfs_H_or_V
-
-    ! Iterative version of the strongconnect routine from:
-    ! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
-    subroutine strongconnect(col)
-      integer, intent(in) :: col
-
-      integer, dimension(:), allocatable :: col_path
-
-      col_index(col) = index
-      col_lowlink(col) = index
-      index = index + 1
-      col_stack = [ col_stack, col ]
-      col_on_stack(col) = .true.
-
-      allocate(col_path(1))
-      col_path(1) = col
-
-      do
-         associate (current_col => col_path(size(col_path)))
-           col_edge(current_col) = col_edge(current_col) + 1
-           if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then
-              ! We visited all edges from this col
-              ! If this is a root node, pop the stack and generate an SCC
-              if (col_lowlink(current_col) == col_index(current_col)) then
-                 associate (current_idx => findloc_back(col_stack, current_col))
-                   associate (cols => col_stack(current_idx:))
-                     col_on_stack(cols) = .false.
-                     associate (rows => this%col_matching(cols))
-                       this%col_fine_block(cols) = block_index
-                       this%row_fine_block(rows) = block_index
-                       this%fine_blocks = [ this%fine_blocks, dm_block(rows, cols, 'S') ]
-                     end associate
-                   end associate
-                   block_index = block_index + 1
-                   col_stack = col_stack(:current_idx-1)
-                 end associate
-              end if
-
-              if (size(col_path) == 1) return
-
-              associate (previous_col => col_path(size(col_path)-1))
-                col_lowlink(previous_col) = min(col_lowlink(previous_col), col_lowlink(current_col))
-              end associate
-              col_path = col_path(:size(col_path)-1)
-           else
-              associate (next_col => this%row_matching(this%col_vertices(current_col)%edges(col_edge(current_col))))
-                if (this%col_coarse(next_col) == 'S' .and. col_index(next_col) == -1) then
-                   col_index(next_col) = index
-                   col_lowlink(next_col) = index
-                   index = index + 1
-                   col_stack = [ col_stack, next_col ]
-                   col_on_stack(next_col) = .true.
-
-                   col_path = [ col_path, next_col ]
-                else if (col_on_stack(next_col)) then
-                   col_lowlink(current_col) = min(col_lowlink(current_col), col_index(next_col))
-                end if
-              end associate
-           end if
-         end associate
-      end do
-    end subroutine strongconnect
-
-    ! Equivalent of findloc(array, val, back = .true.) for rank-1 arrays
-    ! findloc is in the F2008 standard, but only implemented since gfortran 9
-    function findloc_back(array, val)
-      integer, dimension(:), intent(in) :: array
-      integer, intent(in) :: val
-      integer :: findloc_back
-
-      do findloc_back = ubound(array,1),lbound(array,1),-1
-         if (array(findloc_back) == val) return
-      end do
-      error stop "Can’t find element"
-    end function findloc_back
-
-  end subroutine matrix_graph_fine_decomposition
-
-  ! Computes the DAG formed by fine blocks
-  subroutine matrix_graph_fine_blocks_dag(this)
-    class(matrix_graph), intent(inout) :: this
-
-    integer :: blck, i, j
-    logical, dimension(size(this%fine_blocks)) :: marked
-
-    ! Compute predecessors in the DAG
-    do blck = 1,size(this%fine_blocks)
-       marked = .false.
-       associate (row_indices => this%fine_blocks(blck)%row_indices)
-         do i = 1,size(row_indices)
-            associate (row_vertex => this%row_vertices(row_indices(i)))
-              do j = 1,size(row_vertex%edges)
-                 marked(this%col_fine_block(row_vertex%edges(j))) = .true.
-              end do
-            end associate
-         end do
-       end associate
-       marked(blck) = .false.
-       this%fine_blocks(blck)%predecessors = pack([ (i, i=1,size(this%fine_blocks)) ], marked)
-    end do
-
-    ! Compute successors in the DAG
-    do blck = 1,size(this%fine_blocks)
-       marked = .false.
-       associate (col_indices => this%fine_blocks(blck)%col_indices)
-         do i = 1,size(col_indices)
-            associate (col_vertex => this%col_vertices(col_indices(i)))
-              do j = 1,size(col_vertex%edges)
-                 marked(this%row_fine_block(col_vertex%edges(j))) = .true.
-              end do
-            end associate
-         end do
-       end associate
-       marked(blck) = .false.
-       this%fine_blocks(blck)%successors = pack([ (i, i=1,size(this%fine_blocks)) ], marked)
-    end do
-  end subroutine matrix_graph_fine_blocks_dag
-
-  ! Compute the blocks from the fine decomposition, including the DAG they form
   subroutine dm_blocks(mat, blocks)
     real(real64), dimension(:, :), intent(in) :: mat
     type(dm_block), dimension(:), allocatable, intent(out) :: blocks
 
-    type(matrix_graph) :: mg
+    type(c_ptr), dimension(1) :: call_rhs
+    type(c_ptr), dimension(4) :: call_lhs
+    real(real64), dimension(:, :), pointer :: mat_mx
+    real(real64), dimension(:), pointer :: p, q, r, s
+    integer :: i, j
 
-    call mg%init(mat)
-    call mg%maximum_matching
-    call mg%coarse_decomposition
-    call mg%fine_decomposition
-    call mg%fine_blocks_dag
+    call_rhs(1) = mxCreateDoubleMatrix(int(size(mat, 1), mwSize), int(size(mat, 2), mwSize), mxREAL)
+    mat_mx(1:size(mat,1), 1:size(mat,2)) => mxGetPr(call_rhs(1))
+    mat_mx = mat
 
-    call move_alloc(mg%fine_blocks, blocks)
-  end subroutine dm_blocks
+    if (mexCallMATLAB(4_c_int, call_lhs, 1_c_int, call_rhs, "dmperm") /= 0) &
+         call mexErrMsgTxt("Error calling dmperm")
 
-  ! Equivalent of dmperm function of MATLAB/Octave
-  subroutine dmperm(mat, row_order, col_order, row_blocks, col_blocks)
-    real(real64), dimension(:, :), intent(in) :: mat
-    integer, dimension(size(mat, 1)), intent(out) :: row_order
-    integer, dimension(size(mat, 2)), intent(out) :: col_order
-    integer, dimension(:), allocatable, intent(out) :: row_blocks, col_blocks
+    call mxDestroyArray(call_rhs(1))
 
-    type(matrix_graph) :: mg
-    integer :: blck, i
+    p => mxGetPr(call_lhs(1))
+    q => mxGetPr(call_lhs(2))
+    r => mxGetPr(call_lhs(3))
+    s => mxGetPr(call_lhs(4))
 
-    call mg%init(mat)
-    call mg%maximum_matching
-    call mg%coarse_decomposition
-    call mg%fine_decomposition
-
-    allocate(row_blocks(size(mg%fine_blocks)+1))
-    allocate(col_blocks(size(mg%fine_blocks)+1))
-
-    row_blocks(1) = 1
-    col_blocks(1) = 1
-
-    do blck = 1,size(mg%fine_blocks)
-       associate (row_indices => mg%fine_blocks(blck)%row_indices)
-         do i = 1,size(row_indices)
-            row_order(row_blocks(blck)+i-1) = row_indices(i)
-         end do
-         row_blocks(blck+1) = row_blocks(blck)+i-1
-       end associate
-       associate (col_indices => mg%fine_blocks(blck)%col_indices)
-         do i = 1,size(col_indices)
-            col_order(col_blocks(blck)+i-1) = col_indices(i)
-         end do
-         col_blocks(blck+1) = col_blocks(blck)+i-1
-       end associate
+    allocate(blocks(size(r)-1))
+    do i = 1, size(r)-1
+       allocate(blocks(i)%row_indices(int(r(i+1)-r(i))))
+       do j = 1, int(r(i+1)-r(i))
+          blocks(i)%row_indices(j) = int(p(j+int(r(i))-1))
+       end do
+       allocate(blocks(i)%col_indices(int(s(i+1)-s(i))))
+       do j = 1, int(s(i+1)-s(i))
+          blocks(i)%col_indices(j) = int(q(j+int(s(i))-1))
+       end do
     end do
-  end subroutine dmperm
+
+    call mxDestroyArray(call_lhs(1))
+    call mxDestroyArray(call_lhs(2))
+    call mxDestroyArray(call_lhs(3))
+    call mxDestroyArray(call_lhs(4))
+  end subroutine dm_blocks
 end module dulmage_mendelsohn

From 865ab47fa9d4794ea52bf9596309b92c1b96d029 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Thu, 16 Jul 2020 18:20:07 +0200
Subject: [PATCH 6/6] Provide block_trust_region MEX under solve_algo 13 and 14

- block trust region solver now available under solve_algo=13
  It is essentially the same as solve_algo=4, except that Jacobian by finite
  difference is not handled. A test file is added for that case
- block trust region solver with shortcut for equations that can be evaluated
  is now available under solve_algo=14 (in replacement of the pure-MATLAB solver)

Closes: Enterprise/dynare#3
---
 matlab/dynare_solve.m                         |  27 +++-
 matlab/steady_.m                              |   4 +-
 mex/build/block_trust_region.am               |   1 +
 .../block_trust_region/mexFunction.f08        | 151 ++++++++++++++++--
 tests/Makefile.am                             |   1 +
 .../example1_block_trust_region.mod           |  47 ++++++
 6 files changed, 208 insertions(+), 23 deletions(-)
 create mode 100644 tests/steady_state/example1_block_trust_region.mod

diff --git a/matlab/dynare_solve.m b/matlab/dynare_solve.m
index 079bc1429..ba76c3ffc 100644
--- a/matlab/dynare_solve.m
+++ b/matlab/dynare_solve.m
@@ -232,13 +232,13 @@ elseif options.solve_algo==9
     [x, errorflag] = trust_region(f, x, 1:nn, 1:nn, jacobian_flag, options.gstep, ...
                              tolf, tolx, ...
                              maxit, options.debug, arguments{:});
-elseif ismember(options.solve_algo, [2, 12, 4, 14])
+elseif ismember(options.solve_algo, [2, 12, 4])
     if ismember(options.solve_algo, [2, 12])
         solver = @solve1;
     else
         solver = @trust_region;
     end
-    specializedunivariateblocks = ismember(options.solve_algo, [12, 14]);
+    specializedunivariateblocks = options.solve_algo == 12;
     if ~jacobian_flag
         fjac = zeros(nn,nn) ;
         dh = max(abs(x), options.gstep(1)*ones(nn,1))*eps^(1/3);
@@ -251,19 +251,19 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14])
     [j1,j2,r,s] = dmperm(fjac);
     JAC = abs(fjac(j1,j2))>0;
     if options.debug
-        disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r)-1)]);
+        disp(['DYNARE_SOLVE (solve_algo=2|4|12): number of blocks = ' num2str(length(r)-1)]);
     end
     l = 0;
     fre = false;
     for i=length(r)-1:-1:1
         blocklength = r(i+1)-r(i);
         if options.debug
-            dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u of size %u.', i, blocklength);
+            dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u of size %u.', i, blocklength);
         end
         j = r(i):r(i+1)-1;
         if specializedunivariateblocks
             if options.debug
-                dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u by evaluating RHS.', i);
+                dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u by evaluating RHS.', i);
             end
             if isequal(blocklength, 1)
                 if i<length(r)-1
@@ -304,7 +304,7 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14])
             end
         else
             if options.debug
-                dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u with trust_region routine.', i);
+                dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u with trust_region routine.', i);
             end
         end
         [x, errorflag] = solver(f, x, j1(j), j2(j), jacobian_flag, ...
@@ -356,6 +356,19 @@ elseif options.solve_algo == 11
     catch
         errorflag = true;
     end
+elseif ismember(options.solve_algo, [13, 14])
+    if ~jacobian_flag
+        error('DYNARE_SOLVE: option solve_algo=13|14 needs computed Jacobian')
+    end
+    auxstruct = struct();
+    if options.solve_algo == 14
+        auxstruct.lhs = lhs;
+        auxstruct.endo_names = endo_names;
+        auxstruct.isloggedlhs = isloggedlhs;
+        auxstruct.isauxdiffloggedrhs = isauxdiffloggedrhs;
+    end
+    [x, errorflag] = block_trust_region(f, x, tolf, options.solve_tolx, maxit, options.debug, auxstruct, arguments{:});
+    [fvec, fjac] = feval(f, x, arguments{:});
 else
-    error('DYNARE_SOLVE: option solve_algo must be one of [0,1,2,3,4,9,10,11,12,14]')
+    error('DYNARE_SOLVE: option solve_algo must be one of [0,1,2,3,4,9,10,11,12,13,14]')
 end
diff --git a/matlab/steady_.m b/matlab/steady_.m
index 08890ee61..1e7096ef2 100644
--- a/matlab/steady_.m
+++ b/matlab/steady_.m
@@ -35,8 +35,8 @@ function [steady_state,params,info] = steady_(M_,options_,oo_)
 % You should have received a copy of the GNU General Public License
 % along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
 
-if options_.solve_algo < 0 || options_.solve_algo > 12
-    error('STEADY: solve_algo must be between 0 and 12')
+if options_.solve_algo < 0 || options_.solve_algo > 14
+    error('STEADY: solve_algo must be between 0 and 14')
 end
 
 if ~options_.bytecode && ~options_.block && options_.solve_algo > 4 && ...
diff --git a/mex/build/block_trust_region.am b/mex/build/block_trust_region.am
index cb72f64a9..0a3e3f8e0 100644
--- a/mex/build/block_trust_region.am
+++ b/mex/build/block_trust_region.am
@@ -11,6 +11,7 @@ nodist_block_trust_region_SOURCES = \
 BUILT_SOURCES = $(nodist_block_trust_region_SOURCES)
 CLEANFILES = $(nodist_block_trust_region_SOURCES)
 
+dulmage_mendelsohn.o: matlab_mex.mod
 dulmage_mendelsohn.mod: dulmage_mendelsohn.o
 
 matlab_fcn_closure.mod: matlab_fcn_closure.o
diff --git a/mex/sources/block_trust_region/mexFunction.f08 b/mex/sources/block_trust_region/mexFunction.f08
index d375c4250..35d9e7d7a 100644
--- a/mex/sources/block_trust_region/mexFunction.f08
+++ b/mex/sources/block_trust_region/mexFunction.f08
@@ -1,4 +1,4 @@
-! Copyright © 2019 Dynare Team
+! Copyright © 2019-2020 Dynare Team
 !
 ! This file is part of Dynare.
 !
@@ -31,39 +31,104 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
   real(real64), dimension(:), allocatable, target :: x
   type(dm_block), dimension(:), allocatable, target :: blocks
   integer :: info, i
-  real(real64), parameter :: tolf = 1e-6_real64
+  real(real64) :: tolf, tolx
+  integer :: maxiter
   real(real64), dimension(:), allocatable :: fvec
   real(real64), dimension(:,:), allocatable :: fjac
-  logical :: debug
+  logical :: debug, specializedunivariateblocks
   character(len=80) :: debug_msg
+  logical(mxLogical), dimension(:), pointer :: isloggedlhs => null(), &
+       isauxdiffloggedrhs => null()
+  type(c_ptr) :: endo_names, lhs
+  logical :: fre ! True if the last block has been solved (i.e. not evaluated), so that residuals must be updated
+  integer, dimension(:), allocatable :: evaled_cols ! If fre=.false., lists the columns that have been evaluated so far without updating the residuals
 
-  if (nrhs < 3 .or. nlhs /= 2) then
-     call mexErrMsgTxt("Must have at least 3 inputs and exactly 2 outputs")
+  if (nrhs < 4 .or. nlhs /= 2) then
+     call mexErrMsgTxt("Must have at least 7 inputs and exactly 2 outputs")
      return
   end if
 
   if (.not. ((mxIsChar(prhs(1)) .and. mxGetM(prhs(1)) == 1) .or. mxIsClass(prhs(1), "function_handle"))) then
-     call mexErrMsgTxt("First argument should be a string or a function handle")
+     call mexErrMsgTxt("First argument (function) should be a string or a function handle")
      return
   end if
 
   if (.not. (mxIsDouble(prhs(2)) .and. (mxGetM(prhs(2)) == 1 .or. mxGetN(prhs(2)) == 1))) then
-     call mexErrMsgTxt("Second argument should be a real vector")
+     call mexErrMsgTxt("Second argument (initial guess) should be a real vector")
      return
   end if
 
-  if (.not. (mxIsLogicalScalar(prhs(3)))) then
-     call mexErrMsgTxt("Third argument should be a logical scalar")
+  if (.not. (mxIsScalar(prhs(3)) .and. mxIsNumeric(prhs(3)))) then
+     call mexErrMsgTxt("Third argument (tolf) should be a numeric scalar")
      return
   end if
 
+  if (.not. (mxIsScalar(prhs(4)) .and. mxIsNumeric(prhs(4)))) then
+     call mexErrMsgTxt("Fourth argument (tolx) should be a numeric scalar")
+     return
+  end if
+
+  if (.not. (mxIsScalar(prhs(5)) .and. mxIsNumeric(prhs(5)))) then
+     call mexErrMsgTxt("Fifth argument (maxiter) should be a numeric scalar")
+     return
+  end if
+
+  if (.not. (mxIsLogicalScalar(prhs(6)))) then
+     call mexErrMsgTxt("Sixth argument (debug) should be a logical scalar")
+     return
+  end if
+
+  if (.not. (mxIsStruct(prhs(7)) .and. &
+       (mxGetNumberOfFields(prhs(7)) == 0 .or. mxGetNumberOfFields(prhs(7)) == 4))) then
+     call mexErrMsgTxt("Seventh argument should be a struct with either 0 or 4 fields")
+     return
+  end if
+  specializedunivariateblocks = (mxGetNumberOfFields(prhs(7)) == 4)
+
   func => prhs(1)
-  debug = mxGetScalar(prhs(3)) == 1._c_double
-  extra_args => prhs(4:nrhs)
+  tolf = mxGetScalar(prhs(3))
+  tolx = mxGetScalar(prhs(4))
+  maxiter = int(mxGetScalar(prhs(5)))
+  debug = mxGetScalar(prhs(6)) == 1._c_double
+  extra_args => prhs(8:nrhs) ! Extra arguments to func are in argument 8 and subsequent ones
   associate (x_mat => mxGetPr(prhs(2)))
     allocate(x(size(x_mat)))
     x = x_mat
   end associate
+  if (specializedunivariateblocks) then
+     block
+       type(c_ptr) :: tmp
+       tmp = mxGetField(prhs(7), 1_mwIndex, "isloggedlhs")
+       if (.not. (c_associated(tmp) .and. mxIsLogical(tmp) .and. mxGetNumberOfElements(tmp) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'isloggedlhs' field of type logical, of same size as second argument")
+          return
+       end if
+       isloggedlhs => mxGetLogicals(tmp)
+
+       tmp = mxGetField(prhs(7), 1_mwIndex, "isauxdiffloggedrhs")
+       if (.not. (c_associated(tmp) .and. mxIsLogical(tmp) .and. mxGetNumberOfElements(tmp) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'isauxdiffloggedrhs' field of type &
+               &logical, of same size as second argument")
+          return
+       end if
+       isauxdiffloggedrhs => mxGetLogicals(tmp)
+
+       lhs = mxGetField(prhs(7), 1_mwIndex, "lhs")
+       if (.not. (c_associated(lhs) .and. mxIsCell(lhs) .and. mxGetNumberOfElements(lhs) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'lhs' field of type cell, of same size as second argument")
+          return
+       end if
+
+       endo_names = mxGetField(prhs(7), 1_mwIndex, "endo_names")
+       if (.not. (c_associated(endo_names) .and. mxIsCell(endo_names) .and. mxGetNumberOfElements(endo_names) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'endo_names' field of type cell, of same size as second argument")
+          return
+       end if
+     end block
+
+     allocate(evaled_cols(0))
+     fre = .false.
+  end if
 
   allocate(fvec(size(x)))
   allocate(fjac(size(x), size(x)))
@@ -79,7 +144,6 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
   end if
 
   ! Solve the system, starting from bottom-rightmost block
-  x_all => x
   do i = size(blocks),1,-1
      if (debug) then
         write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' of size ', i0)") &
@@ -87,18 +151,77 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
         call mexPrintf_trim_newline(debug_msg)
      end if
 
+     if (specializedunivariateblocks .and. size(blocks(i)%col_indices) == 1) then
+        if (debug) then
+           write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' by evaluating RHS')") i
+           call mexPrintf_trim_newline(debug_msg)
+        end if
+        associate (eq => blocks(i)%row_indices(1), var => blocks(i)%col_indices(1))
+          if (fre .or. any(abs(fjac(eq, evaled_cols)) > 0._real64)) then
+             ! Reevaluation of the residuals is required because the current RHS depends on
+             ! variables that potentially have been updated previously.
+             nullify(x_indices, f_indices, x_all)
+             call matlab_fcn(x, fvec)
+             deallocate(evaled_cols) ! This shouldn’t be necessary, but it crashes otherwise with gfortran 8
+             allocate(evaled_cols(0))
+             fre = .false.
+          end if
+          evaled_cols = [ evaled_cols, var]
+          block
+            ! An associate() construct for lhs_eq and endo_name_var makes the
+            ! code crash (with double free) using gfortran 8. Hence use a block
+            character(kind=c_char, len=:), allocatable :: lhs_eq, endo_name_var
+            lhs_eq = mxArrayToString(mxGetCell(lhs, int(eq, mwIndex)))
+            endo_name_var = mxArrayToString(mxGetCell(endo_names, int(var, mwIndex)))
+            if (lhs_eq == endo_name_var .or. lhs_eq == "log(" // endo_name_var // ")") then
+               if (isloggedlhs(eq)) then
+                  x(var) = exp(log(x(var)) - fvec(eq))
+               else
+                  x(var) = x(var) - fvec(eq)
+               end if
+            else
+               if (debug) then
+                  write (debug_msg, "('LHS variable is not determined by RHS expression (', i0, ')')") eq
+                  call mexPrintf_trim_newline(debug_msg)
+                  write (debug_msg, "(a, ' -> ', a)") lhs_eq, endo_name_var
+                  call mexPrintf_trim_newline(debug_msg)
+               end if
+               if (lhs_eq(1:9) == "AUX_DIFF_" .or. lhs_eq(1:13) == "log(AUX_DIFF_") then
+                  if (isauxdiffloggedrhs(eq)) then
+                     x(var) = exp(log(x(var)) + fvec(eq))
+                  else
+                     x(var) = x(var) + fvec(eq)
+                  end if
+               else
+                  call mexErrMsgTxt("Algorithm solve_algo=14 cannot be used with this nonlinear problem")
+                  return
+               end if
+            end if
+          end block
+        end associate
+        cycle
+     else
+        if (debug) then
+           write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' with trust region routine')") i
+           call mexPrintf_trim_newline(debug_msg)
+        end if
+     end if
+
      block
        real(real64), dimension(size(blocks(i)%col_indices)) :: x_block
        x_indices => blocks(i)%col_indices
        f_indices => blocks(i)%row_indices
+       x_all => x
        if (size(x_indices) /= size(f_indices)) then
           call mexErrMsgTxt("Non-square block")
           return
        end if
        x_block = x(x_indices)
-       call trust_region_solve(x_block, matlab_fcn, info, tolf = tolf)
+       call trust_region_solve(x_block, matlab_fcn, info, tolx, tolf, maxiter)
        x(x_indices) = x_block
      end block
+
+     fre = .true.
   end do
 
   ! Verify that we have a solution
@@ -113,7 +236,7 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
   if (maxval(abs(fvec)) > tolf) then
      if (debug) &
           call mexPrintf_trim_newline("DYNARE_SOLVE (solve_algo=13|14): residuals still too large, solving for the whole model")
-     call trust_region_solve(x, matlab_fcn, info, tolf = tolf)
+     call trust_region_solve(x, matlab_fcn, info, tolx, tolf, maxiter)
   else
      info = 1
   end if
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9da3c18dc..b8333e142 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -134,6 +134,7 @@ MODFILES = \
 	steady_state/walsh1_ssm_block.mod \
 	steady_state/multi_leads.mod \
 	steady_state/example1_trust_region.mod \
+	steady_state/example1_block_trust_region.mod \
 	steady_state/Gali_2015_chapter_6_4.mod \
 	steady_state_operator/standard.mod \
 	steady_state_operator/use_dll.mod \
diff --git a/tests/steady_state/example1_block_trust_region.mod b/tests/steady_state/example1_block_trust_region.mod
new file mode 100644
index 000000000..54643e083
--- /dev/null
+++ b/tests/steady_state/example1_block_trust_region.mod
@@ -0,0 +1,47 @@
+// Test block trust region nonlinear solver (solve_algo=13)
+var y, c, k, a, h, b;
+varexo e, u;
+
+parameters beta, rho, alpha, delta, theta, psi, tau;
+
+alpha = 0.36;
+rho   = 0.95;
+tau   = 0.025;
+beta  = 0.99;
+delta = 0.025;
+psi   = 0;
+theta = 2.95;
+
+phi   = 0.1;
+
+model;
+c*theta*h^(1+psi)=(1-alpha)*y;
+k = beta*(((exp(b)*c)/(exp(b(+1))*c(+1)))
+    *(exp(b(+1))*alpha*y(+1)+(1-delta)*k));
+y = exp(a)*(k(-1)^alpha)*(h^(1-alpha));
+k = exp(b)*(y-c)+(1-delta)*k(-1);
+a = rho*a(-1)+tau*b(-1) + e;
+b = tau*a(-1)+rho*b(-1) + u;
+end;
+
+initval;
+y = 1;
+c = 0.8;
+h = 0.3;
+k = 10;
+a = 0;
+b = 0;
+e = 0;
+u = 0;
+end;
+
+options_.debug = true;
+steady(solve_algo=13);
+
+shocks;
+var e; stderr 0.009;
+var u; stderr 0.009;
+var e, u = phi*0.009*0.009;
+end;
+
+stoch_simul(order=1,nomoments,irf=0);