Merge Enterprise/block_trust_region into enterprise.

2020-09-24 16:05:55 +02:00 · 2020-09-24 16:05:55 +02:00 · e690c5de5a
parent c7c7358a5b 865ab47fa9
commit e690c5de5a
8 changed files with 270 additions and 570 deletions
--- a/matlab/dynare_solve.m
+++ b/matlab/dynare_solve.m
@ -232,13 +232,13 @@ elseif options.solve_algo==9
    [x, errorflag] = trust_region(f, x, 1:nn, 1:nn, jacobian_flag, options.gstep, ...
                             tolf, tolx, ...
                             maxit, options.debug, arguments{:});
-elseif ismember(options.solve_algo, [2, 12, 4, 14])
+elseif ismember(options.solve_algo, [2, 12, 4])
    if ismember(options.solve_algo, [2, 12])
        solver = @solve1;
    else
        solver = @trust_region;
    end
-    specializedunivariateblocks = ismember(options.solve_algo, [12, 14]);
+    specializedunivariateblocks = options.solve_algo == 12;
    if ~jacobian_flag
        fjac = zeros(nn,nn) ;
        dh = max(abs(x), options.gstep(1)*ones(nn,1))*eps^(1/3);
@ -251,19 +251,19 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14])
    [j1,j2,r,s] = dmperm(fjac);
    JAC = abs(fjac(j1,j2))>0;
    if options.debug
-        disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r))]);
+        disp(['DYNARE_SOLVE (solve_algo=2|4|12): number of blocks = ' num2str(length(r)-1)]);
    end
    l = 0;
    fre = false;
    for i=length(r)-1:-1:1
        blocklength = r(i+1)-r(i);
        if options.debug
-            dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u of size %u.', i, blocklength);
+            dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u of size %u.', i, blocklength);
        end
        j = r(i):r(i+1)-1;
        if specializedunivariateblocks
            if options.debug
-                dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u by evaluating RHS.', i);
+                dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u by evaluating RHS.', i);
            end
            if isequal(blocklength, 1)
                if i<length(r)-1
@ -304,7 +304,7 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14])
            end
        else
            if options.debug
-                dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u with trust_region routine.', i);
+                dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u with trust_region routine.', i);
            end
        end
        [x, errorflag] = solver(f, x, j1(j), j2(j), jacobian_flag, ...
@ -356,6 +356,19 @@ elseif options.solve_algo == 11
    catch
        errorflag = true;
    end
+elseif ismember(options.solve_algo, [13, 14])
+    if ~jacobian_flag
+        error('DYNARE_SOLVE: option solve_algo=13|14 needs computed Jacobian')
+    end
+    auxstruct = struct();
+    if options.solve_algo == 14
+        auxstruct.lhs = lhs;
+        auxstruct.endo_names = endo_names;
+        auxstruct.isloggedlhs = isloggedlhs;
+        auxstruct.isauxdiffloggedrhs = isauxdiffloggedrhs;
+    end
+    [x, errorflag] = block_trust_region(f, x, tolf, options.solve_tolx, maxit, options.debug, auxstruct, arguments{:});
+    [fvec, fjac] = feval(f, x, arguments{:});
 else
-    error('DYNARE_SOLVE: option solve_algo must be one of [0,1,2,3,4,9,10,11,12,14]')
+    error('DYNARE_SOLVE: option solve_algo must be one of [0,1,2,3,4,9,10,11,12,13,14]')
 end
--- a/matlab/steady_.m
+++ b/matlab/steady_.m
@ -35,8 +35,8 @@ function [steady_state,params,info] = steady_(M_,options_,oo_)
 % You should have received a copy of the GNU General Public License
 % along with Dynare.  If not, see <http://www.gnu.org/licenses/>.

-if options_.solve_algo < 0 || options_.solve_algo > 12
-    error('STEADY: solve_algo must be between 0 and 12')
+if options_.solve_algo < 0 || options_.solve_algo > 14
+    error('STEADY: solve_algo must be between 0 and 14')
 end

 if ~options_.bytecode && ~options_.block && options_.solve_algo > 4 && ...
--- a/mex/build/block_trust_region.am
+++ b/mex/build/block_trust_region.am
@ -11,6 +11,7 @@ nodist_block_trust_region_SOURCES = \
 BUILT_SOURCES = $(nodist_block_trust_region_SOURCES)
 CLEANFILES = $(nodist_block_trust_region_SOURCES)

+dulmage_mendelsohn.o: matlab_mex.mod
 dulmage_mendelsohn.mod: dulmage_mendelsohn.o

 matlab_fcn_closure.mod: matlab_fcn_closure.o
--- a/mex/sources/block_trust_region/dulmage_mendelsohn.f08
+++ b/mex/sources/block_trust_region/dulmage_mendelsohn.f08
@ -1,16 +1,7 @@
-! Implementation of the Dulmage-Mendelsohn decomposition
-!
-! Closely follows the description of Pothen and Fan (1990), “Computing the
-! Block Triangular Form of a Sparse Matrix”, ACM Transactions on Mathematical
-! Software, Vol. 16, No. 4, pp. 303–324
-!
-! In addition, also computes the directed acyclic graph (DAG) formed by the
-! fine blocks, in order to optimize the parallel resolution of a linear system.
-! The last (i.e. lower-right) block has no predecessor (since this is an
-! *upper* block triangular form), and the other blocks may have predecessors if
-! they use variables computed by blocks further on the right.
+! Wrapper around MATLAB’s dmperm to compute the Dulmage-Mendelsohn
+! decomposition

-! Copyright © 2019 Dynare Team
+! Copyright © 2020 Dynare Team
 !
 ! This file is part of Dynare.
 !
@ -29,559 +20,53 @@

 module dulmage_mendelsohn
  use iso_fortran_env
+  use matlab_mex
  implicit none

-  private
-  public :: dm_block, dmperm, dm_blocks
-
  ! Represents a block in the fine DM decomposition
  type :: dm_block
     integer, dimension(:), allocatable :: row_indices, col_indices
-     character :: coarse_type ! Place in the coarse decomposition, either 'H', 'S' or 'V'
-     integer, dimension(:), allocatable :: predecessors, successors ! Predecessors and successors in the DAG formed by fine DM blocks
  end type dm_block
-
-  type :: vertex
-     integer, dimension(:), allocatable :: edges
-     integer :: id ! Index of row of column represented by this vertex
-  end type vertex
-
-  type :: matrix_graph
-     type(vertex), dimension(:), allocatable :: row_vertices, col_vertices
-     integer, dimension(:), allocatable :: row_matching, col_matching ! Maximum matching
-     character, dimension(:), allocatable :: row_coarse, col_coarse ! Coarse decomposition
-     type(dm_block), dimension(:), allocatable :: fine_blocks ! Fine decomposition
-     integer, dimension(:), allocatable :: row_fine_block, col_fine_block ! Maps rows/cols to their block index
-   contains
-     procedure :: init => matrix_graph_init
-     procedure :: maximum_matching => matrix_graph_maximum_matching
-     procedure :: coarse_decomposition => matrix_graph_coarse_decomposition
-     procedure :: fine_decomposition => matrix_graph_fine_decomposition
-     procedure :: fine_blocks_dag => matrix_graph_fine_blocks_dag
-  end type matrix_graph
-
 contains
-  ! Initialize a matrix_graph object, given a matrix
-  subroutine matrix_graph_init(this, mat)
-    class(matrix_graph), intent(inout) :: this
-    real(real64), dimension(:, :), intent(in) :: mat
-
-    integer :: i, j
-
-    if (size(mat, 1) < size(mat, 2)) &
-         error stop "Matrix has less rows than columns; consider transposing it"
-
-    ! Create row vertices
-    allocate(this%row_vertices(size(mat, 1)))
-    do i = 1, size(mat, 1)
-       this%row_vertices(i)%edges = pack([ (j, j=1,size(mat, 2)) ], mat(i, :) /= 0_real64)
-       this%row_vertices(i)%id = i
-    end do
-
-    ! Create column vertices
-    allocate(this%col_vertices(size(mat, 2)))
-    do i = 1, size(mat, 2)
-       this%col_vertices(i)%edges = pack([ (j, j=1, size(mat, 1)) ], mat(:, i) /= 0_real64)
-       this%col_vertices(i)%id = i
-    end do
-  end subroutine matrix_graph_init
-
-  ! Computes the maximum matching for this graph
-  subroutine matrix_graph_maximum_matching(this)
-    class(matrix_graph), intent(inout) :: this
-
-    logical, dimension(size(this%row_vertices)) :: row_visited
-    integer, dimension(:), allocatable :: col_unmatched, col_unmatched_new
-    integer :: i, j
-
-    if (allocated(this%row_matching) .or. allocated(this%col_matching)) &
-         error stop "Maximum matching already computed"
-
-    allocate(this%row_matching(size(this%row_vertices)))
-    allocate(this%col_matching(size(this%col_vertices)))
-
-    this%row_matching = 0
-    this%col_matching = 0
-    allocate(col_unmatched(0))
-
-    ! Compute cheap matching
-    do i = 1, size(this%col_vertices)
-       match_col: do j = 1, size(this%col_vertices(i)%edges)
-          associate (v => this%col_vertices(i)%edges(j))
-            if (this%row_matching(v) == 0) then
-               this%row_matching(v) = i
-               this%col_matching(i) = v
-               exit match_col
-            end if
-          end associate
-       end do match_col
-       if (this%col_matching(i) == 0) col_unmatched = [ col_unmatched, i ]
-    end do
-
-    ! Augment matching
-    allocate(col_unmatched_new(0))
-    do
-       row_visited = .false.
-       do i = 1, size(col_unmatched)
-          call search_augmenting_path(col_unmatched(i))
-       end do
-       if (size(col_unmatched) == size(col_unmatched_new)) exit
-       call move_alloc(from=col_unmatched_new, to=col_unmatched)
-       allocate(col_unmatched_new(0))
-    end do
-  contains
-    ! Depth-first search for an augmenting path
-    ! The algorithm is written iteratively (and not recursively), because
-    ! otherwise the stack could overflow on large matrices
-    subroutine search_augmenting_path(col)
-      integer, intent(in) :: col
-
-      integer, dimension(:), allocatable :: row_path, col_path ! Path visited so far
-      integer, dimension(size(this%col_vertices)) :: col_edge ! For each column, keeps the last visited edge index
-      integer :: i
-
-      col_edge = 0
-      allocate(col_path(1))
-      col_path(1) = col
-      allocate(row_path(0))
-
-      do
-         ! Depth-first search, starting from last column in the path
-         associate (current_col => col_path(size(col_path)))
-           col_edge(current_col) = col_edge(current_col) + 1
-           if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then
-              ! We visited all edges from this column, need to backtrack
-              if (size(col_path) == 1) then
-                 ! The search failed
-                 col_unmatched_new = [ col_unmatched_new, col ]
-                 return
-              end if
-              col_path = col_path(:size(col_path)-1)
-              row_path = row_path(:size(row_path)-1)
-           else
-              associate (current_row => this%col_vertices(current_col)%edges(col_edge(current_col)))
-                if (.not. row_visited(current_row)) then
-                   row_visited(current_row) = .true.
-                   row_path = [ row_path, current_row ]
-                   if (this%row_matching(current_row) == 0) then
-                      ! We found an augmenting path, update matching and return
-                      do i = 1, size(col_path)
-                         this%row_matching(row_path(i)) = col_path(i)
-                         this%col_matching(col_path(i)) = row_path(i)
-                      end do
-                      return
-                   end if
-                   col_path = [ col_path, this%row_matching(current_row)]
-                end if
-              end associate
-           end if
-         end associate
-      end do
-    end subroutine search_augmenting_path
-  end subroutine matrix_graph_maximum_matching
-
-  ! Computes the coarse decomposition
-  ! The {row,col}_coarse arrays are filled with characters 'H', 'S' and 'V'
-  subroutine matrix_graph_coarse_decomposition(this)
-    class(matrix_graph), intent(inout) :: this
-
-    integer :: i
-
-    if (allocated(this%row_coarse) .or. allocated(this%col_coarse)) &
-         error stop "Coarse decomposition already computed"
-
-    if (.not. (allocated(this%row_matching) .and. allocated(this%col_matching))) &
-         error stop "Maximum matching not yet computed"
-
-    allocate(this%row_coarse(size(this%row_vertices)))
-    allocate(this%col_coarse(size(this%col_vertices)))
-
-    ! Initialize partition
-    this%row_coarse = 'S'
-    this%col_coarse = 'S'
-
-    ! Identify A_h
-    do i = 1, size(this%col_vertices)
-       if (this%col_matching(i) /= 0) cycle
-       this%col_coarse(i) = 'H'
-       call alternating_dfs_col(i)
-    end do
-
-    ! Identify A_v
-    do i = 1, size(this%row_vertices)
-       if (this%row_matching(i) /= 0) cycle
-       this%row_coarse(i) = 'V'
-       call alternating_dfs_row(i)
-    end do
-  contains
-    ! Depth-first search along alternating path, starting from a column
-    ! Written iteratively, to avoid stack overflow on large matrices
-    subroutine alternating_dfs_col(col)
-      integer, intent(in) :: col
-
-      integer, dimension(size(this%col_vertices)) :: col_edge
-      integer, dimension(:), allocatable :: col_path ! path visited so far
-
-      col_edge = 0
-      allocate(col_path(1))
-      col_path(1) = col
-
-      do
-         associate (current_col => col_path(size(col_path)))
-           col_edge(current_col) = col_edge(current_col) + 1
-           if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then
-              ! We visited all edges from this column, need to backtrack
-              if (size(col_path) == 1) return
-              col_path = col_path(:size(col_path)-1)
-           else
-              associate (current_row => this%col_vertices(current_col)%edges(col_edge(current_col)))
-                if (this%row_coarse(current_row) /= 'H') then
-                   this%row_coarse(current_row) = 'H'
-                   if (this%row_matching(current_row) /= 0) then
-                      this%col_coarse(this%row_matching(current_row)) = 'H'
-                      col_path = [ col_path, this%row_matching(current_row)]
-                   end if
-                end if
-              end associate
-           end if
-         end associate
-      end do
-    end subroutine alternating_dfs_col
-
-    ! Depth-first search along alternating path, starting from a row
-    ! This is the perfect symmetric of alternating_dfs_col (swapping "col" and "row")
-    subroutine alternating_dfs_row(row)
-      integer, intent(in) :: row
-
-      integer, dimension(size(this%row_vertices)) :: row_edge
-      integer, dimension(:), allocatable :: row_path ! path visited so far
-
-      row_edge = 0
-      allocate(row_path(1))
-      row_path(1) = row
-
-      do
-         associate (current_row => row_path(size(row_path)))
-           row_edge(current_row) = row_edge(current_row) + 1
-           if (row_edge(current_row) > size(this%row_vertices(current_row)%edges)) then
-              ! We visited all edges from this row, need to backtrack
-              if (size(row_path) == 1) return
-              row_path = row_path(:size(row_path)-1)
-           else
-              associate (current_col => this%row_vertices(current_row)%edges(row_edge(current_row)))
-                if (this%col_coarse(current_col) /= 'V') then
-                   this%col_coarse(current_col) = 'V'
-                   if (this%col_matching(current_col) /= 0) then
-                      this%row_coarse(this%col_matching(current_col)) = 'V'
-                      row_path = [ row_path, this%col_matching(current_col)]
-                   end if
-                end if
-              end associate
-           end if
-        end associate
-      end do
-    end subroutine alternating_dfs_row
-  end subroutine matrix_graph_coarse_decomposition
-
-  ! Computes the coarse decomposition
-  ! Allocate and fills the this%row_fine_* and this%col_fine* arrays
-  subroutine matrix_graph_fine_decomposition(this)
-    class(matrix_graph), intent(inout) :: this
-
-    ! Index of next block to be discovered
-    integer :: block_index
-
-    ! Used in DFS (both in dfs_H_or_V and strongconnect)
-    integer, dimension(size(this%row_vertices)) :: row_edge
-    integer, dimension(size(this%col_vertices)) :: col_edge
-
-    ! For dfs_H_or_V
-    logical, dimension(size(this%row_vertices)) :: row_visited
-    logical, dimension(size(this%col_vertices)) :: col_visited
-
-    ! For strongconnect
-    integer, dimension(:), allocatable :: col_stack
-    logical, dimension(size(this%col_vertices)) :: col_on_stack
-    integer, dimension(size(this%col_vertices)) :: col_index, col_lowlink
-    integer :: index
-
-    integer :: i
-
-    if (allocated(this%fine_blocks) &
-         .or. allocated(this%row_fine_block) .or. allocated(this%col_fine_block)) &
-         error stop "Fine decomposition already computed"
-
-    if (.not. (allocated(this%row_coarse) .and. allocated(this%col_coarse))) &
-         error stop "Coarse decomposition not yet computed"
-
-    allocate(this%row_fine_block(size(this%row_vertices)))
-    allocate(this%col_fine_block(size(this%col_vertices)))
-    allocate(this%fine_blocks(0))
-    block_index = 1
-
-    row_visited = .false.
-    col_visited = .false.
-    row_edge = 0
-    col_edge = 0
-
-    ! Compute blocks in H
-    do i = 1, size(this%row_vertices)
-       if (this%row_coarse(i) == 'H' .and. .not. row_visited(i)) call dfs_H_or_V(i, .true., 'H')
-    end do
-    do i = 1, size(this%col_vertices)
-       if (this%col_coarse(i) == 'H' .and. .not. col_visited(i)) call dfs_H_or_V(i, .false., 'H')
-    end do
-
-    ! Compute blocks in S (Tarjan algorithm on graph consisting only of column
-    ! vertices, where rows have been merged with their matching columns)
-    allocate(col_stack(0))
-    col_on_stack = .false.
-    index = 0
-    col_index = -1
-    do i = 1, size(this%col_vertices)
-       if (this%col_coarse(i) == 'S' .and. col_index(i) == -1) call strongconnect(i)
-    end do
-
-    ! Compute blocks in V
-    do i = 1, size(this%row_vertices)
-       if (this%row_coarse(i) == 'V' .and. .not. row_visited(i)) call dfs_H_or_V(i, .true., 'V')
-    end do
-    do i = 1, size(this%col_vertices)
-       if (this%col_coarse(i) == 'V' .and. .not. col_visited(i)) call dfs_H_or_V(i, .false., 'V')
-    end do
-  contains
-    ! Depth-first search for identifying one block inside the H or V sub-matrix
-    subroutine dfs_H_or_V(id, is_row, coarse_type)
-      integer, intent(in) :: id            ! Start vertex ID
-      logical, intent(in) :: is_row        ! Whether start vertex is a row
-      character, intent(in) :: coarse_type ! Either 'H' or 'V'
-
-      integer, dimension(:), allocatable :: path
-      type(dm_block) :: new_block
-
-      allocate(path(1))
-      path(1) = id
-
-      if (is_row) then
-         this%row_fine_block(id) = block_index
-         new_block%row_indices = [ id ]
-         allocate(new_block%col_indices(0))
-         row_visited(id) = .true.
-      else
-         this%col_fine_block(id) = block_index
-         new_block%col_indices = [ id ]
-         allocate(new_block%row_indices(0))
-         col_visited(id) = .true.
-      end if
-
-      do
-         associate (current_id => path(size(path)))
-           if ((mod(size(path), 2) == 0) .neqv. is_row) then ! Current vertex is a row
-              row_edge(current_id) = row_edge(current_id) + 1
-              if (row_edge(current_id) > size(this%row_vertices(current_id)%edges)) then
-                 if (size(path) == 1) exit
-                 path = path(:size(path)-1)
-              else
-                 associate (next_id => this%row_vertices(current_id)%edges(row_edge(current_id)))
-                   if (this%col_coarse(next_id) == coarse_type .and. .not. col_visited(next_id)) then
-                      col_visited(next_id) = .true.
-                      path = [ path, next_id ]
-                      this%col_fine_block(next_id) = block_index
-                      new_block%col_indices = [ new_block%col_indices, next_id ]
-                   end if
-                 end associate
-              end if
-           else ! Current vertex is a column
-              col_edge(current_id) = col_edge(current_id) + 1
-              if (col_edge(current_id) > size(this%col_vertices(current_id)%edges)) then
-                 if (size(path) == 1) exit
-                 path = path(:size(path)-1)
-              else
-                 associate (next_id => this%col_vertices(current_id)%edges(col_edge(current_id)))
-                   if (this%row_coarse(next_id) == coarse_type .and. .not. row_visited(next_id)) then
-                      row_visited(next_id) = .true.
-                      path = [ path, next_id ]
-                      this%row_fine_block(next_id) = block_index
-                      new_block%row_indices = [ new_block%row_indices, next_id ]
-                   end if
-                 end associate
-              end if
-           end if
-         end associate
-      end do
-
-      new_block%coarse_type = coarse_type
-      this%fine_blocks = [ this%fine_blocks, new_block ]
-      block_index = block_index + 1
-    end subroutine dfs_H_or_V
-
-    ! Iterative version of the strongconnect routine from:
-    ! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
-    subroutine strongconnect(col)
-      integer, intent(in) :: col
-
-      integer, dimension(:), allocatable :: col_path
-
-      col_index(col) = index
-      col_lowlink(col) = index
-      index = index + 1
-      col_stack = [ col_stack, col ]
-      col_on_stack(col) = .true.
-
-      allocate(col_path(1))
-      col_path(1) = col
-
-      do
-         associate (current_col => col_path(size(col_path)))
-           col_edge(current_col) = col_edge(current_col) + 1
-           if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then
-              ! We visited all edges from this col
-              ! If this is a root node, pop the stack and generate an SCC
-              if (col_lowlink(current_col) == col_index(current_col)) then
-                 associate (current_idx => findloc_back(col_stack, current_col))
-                   associate (cols => col_stack(current_idx:))
-                     col_on_stack(cols) = .false.
-                     associate (rows => this%col_matching(cols))
-                       this%col_fine_block(cols) = block_index
-                       this%row_fine_block(rows) = block_index
-                       this%fine_blocks = [ this%fine_blocks, dm_block(rows, cols, 'S') ]
-                     end associate
-                   end associate
-                   block_index = block_index + 1
-                   col_stack = col_stack(:current_idx-1)
-                 end associate
-              end if
-
-              if (size(col_path) == 1) return
-
-              associate (previous_col => col_path(size(col_path)-1))
-                col_lowlink(previous_col) = min(col_lowlink(previous_col), col_lowlink(current_col))
-              end associate
-              col_path = col_path(:size(col_path)-1)
-           else
-              associate (next_col => this%row_matching(this%col_vertices(current_col)%edges(col_edge(current_col))))
-                if (this%col_coarse(next_col) == 'S' .and. col_index(next_col) == -1) then
-                   col_index(next_col) = index
-                   col_lowlink(next_col) = index
-                   index = index + 1
-                   col_stack = [ col_stack, next_col ]
-                   col_on_stack(next_col) = .true.
-
-                   col_path = [ col_path, next_col ]
-                else if (col_on_stack(next_col)) then
-                   col_lowlink(current_col) = min(col_lowlink(current_col), col_index(next_col))
-                end if
-              end associate
-           end if
-         end associate
-      end do
-    end subroutine strongconnect
-
-    ! Equivalent of findloc(array, val, back = .true.) for rank-1 arrays
-    ! findloc is in the F2008 standard, but only implemented since gfortran 9
-    function findloc_back(array, val)
-      integer, dimension(:), intent(in) :: array
-      integer, intent(in) :: val
-      integer :: findloc_back
-
-      do findloc_back = ubound(array,1),lbound(array,1),-1
-         if (array(findloc_back) == val) return
-      end do
-      error stop "Can’t find element"
-    end function findloc_back
-
-  end subroutine matrix_graph_fine_decomposition
-
-  ! Computes the DAG formed by fine blocks
-  subroutine matrix_graph_fine_blocks_dag(this)
-    class(matrix_graph), intent(inout) :: this
-
-    integer :: blck, i, j
-    logical, dimension(size(this%fine_blocks)) :: marked
-
-    ! Compute predecessors in the DAG
-    do blck = 1,size(this%fine_blocks)
-       marked = .false.
-       associate (row_indices => this%fine_blocks(blck)%row_indices)
-         do i = 1,size(row_indices)
-            associate (row_vertex => this%row_vertices(row_indices(i)))
-              do j = 1,size(row_vertex%edges)
-                 marked(this%col_fine_block(row_vertex%edges(j))) = .true.
-              end do
-            end associate
-         end do
-       end associate
-       marked(blck) = .false.
-       this%fine_blocks(blck)%predecessors = pack([ (i, i=1,size(this%fine_blocks)) ], marked)
-    end do
-
-    ! Compute successors in the DAG
-    do blck = 1,size(this%fine_blocks)
-       marked = .false.
-       associate (col_indices => this%fine_blocks(blck)%col_indices)
-         do i = 1,size(col_indices)
-            associate (col_vertex => this%col_vertices(col_indices(i)))
-              do j = 1,size(col_vertex%edges)
-                 marked(this%row_fine_block(col_vertex%edges(j))) = .true.
-              end do
-            end associate
-         end do
-       end associate
-       marked(blck) = .false.
-       this%fine_blocks(blck)%successors = pack([ (i, i=1,size(this%fine_blocks)) ], marked)
-    end do
-  end subroutine matrix_graph_fine_blocks_dag
-
-  ! Compute the blocks from the fine decomposition, including the DAG they form
  subroutine dm_blocks(mat, blocks)
    real(real64), dimension(:, :), intent(in) :: mat
    type(dm_block), dimension(:), allocatable, intent(out) :: blocks

-    type(matrix_graph) :: mg
+    type(c_ptr), dimension(1) :: call_rhs
+    type(c_ptr), dimension(4) :: call_lhs
+    real(real64), dimension(:, :), pointer :: mat_mx
+    real(real64), dimension(:), pointer :: p, q, r, s
+    integer :: i, j

-    call mg%init(mat)
-    call mg%maximum_matching
-    call mg%coarse_decomposition
-    call mg%fine_decomposition
-    call mg%fine_blocks_dag
+    call_rhs(1) = mxCreateDoubleMatrix(int(size(mat, 1), mwSize), int(size(mat, 2), mwSize), mxREAL)
+    mat_mx(1:size(mat,1), 1:size(mat,2)) => mxGetPr(call_rhs(1))
+    mat_mx = mat

-    call move_alloc(mg%fine_blocks, blocks)
-  end subroutine dm_blocks
+    if (mexCallMATLAB(4_c_int, call_lhs, 1_c_int, call_rhs, "dmperm") /= 0) &
+         call mexErrMsgTxt("Error calling dmperm")

-  ! Equivalent of dmperm function of MATLAB/Octave
-  subroutine dmperm(mat, row_order, col_order, row_blocks, col_blocks)
-    real(real64), dimension(:, :), intent(in) :: mat
-    integer, dimension(size(mat, 1)), intent(out) :: row_order
-    integer, dimension(size(mat, 2)), intent(out) :: col_order
-    integer, dimension(:), allocatable, intent(out) :: row_blocks, col_blocks
+    call mxDestroyArray(call_rhs(1))

-    type(matrix_graph) :: mg
-    integer :: blck, i
+    p => mxGetPr(call_lhs(1))
+    q => mxGetPr(call_lhs(2))
+    r => mxGetPr(call_lhs(3))
+    s => mxGetPr(call_lhs(4))

-    call mg%init(mat)
-    call mg%maximum_matching
-    call mg%coarse_decomposition
-    call mg%fine_decomposition
-
-    allocate(row_blocks(size(mg%fine_blocks)+1))
-    allocate(col_blocks(size(mg%fine_blocks)+1))
-
-    row_blocks(1) = 1
-    col_blocks(1) = 1
-
-    do blck = 1,size(mg%fine_blocks)
-       associate (row_indices => mg%fine_blocks(blck)%row_indices)
-         do i = 1,size(row_indices)
-            row_order(row_blocks(blck)+i-1) = row_indices(i)
-         end do
-         row_blocks(blck+1) = row_blocks(blck)+i-1
-       end associate
-       associate (col_indices => mg%fine_blocks(blck)%col_indices)
-         do i = 1,size(col_indices)
-            col_order(col_blocks(blck)+i-1) = col_indices(i)
-         end do
-         col_blocks(blck+1) = col_blocks(blck)+i-1
-       end associate
+    allocate(blocks(size(r)-1))
+    do i = 1, size(r)-1
+       allocate(blocks(i)%row_indices(int(r(i+1)-r(i))))
+       do j = 1, int(r(i+1)-r(i))
+          blocks(i)%row_indices(j) = int(p(j+int(r(i))-1))
+       end do
+       allocate(blocks(i)%col_indices(int(s(i+1)-s(i))))
+       do j = 1, int(s(i+1)-s(i))
+          blocks(i)%col_indices(j) = int(q(j+int(s(i))-1))
+       end do
    end do
-  end subroutine dmperm
+
+    call mxDestroyArray(call_lhs(1))
+    call mxDestroyArray(call_lhs(2))
+    call mxDestroyArray(call_lhs(3))
+    call mxDestroyArray(call_lhs(4))
+  end subroutine dm_blocks
 end module dulmage_mendelsohn
--- a/mex/sources/block_trust_region/matlab_fcn_closure.f08
+++ b/mex/sources/block_trust_region/matlab_fcn_closure.f08
@ -11,7 +11,7 @@
 ! be set in order to give the input values for the indices that are not passed
 ! to matlab_fcn.

-! Copyright © 2019 Dynare Team
+! Copyright © 2019-2020 Dynare Team
 !
 ! This file is part of Dynare.
 !
@ -80,12 +80,16 @@ contains
    if (mexCallMATLAB(nlhs, call_lhs, int(size(call_rhs), c_int), call_rhs, "feval") /= 0) &
         call mexErrMsgTxt("Error calling function to be solved")

+    call mxDestroyArray(call_rhs(2))
+
    fvec_all => mxGetPr(call_lhs(1))
    if (associated(f_indices)) then
       fvec = fvec_all(f_indices)
    else
       fvec = fvec_all
    end if
+    call mxDestroyArray(call_lhs(1))
+
    if (present(fjac)) then
       fjac_all(1:n_all,1:n_all) => mxGetPr(call_lhs(2))
       if (associated(x_indices) .and. associated(f_indices)) then
@ -93,6 +97,7 @@ contains
       else
          fjac = fjac_all
       end if
+       call mxDestroyArray(call_lhs(2))
    end if
  end subroutine matlab_fcn
 end module matlab_fcn_closure
--- a/mex/sources/block_trust_region/mexFunction.f08
+++ b/mex/sources/block_trust_region/mexFunction.f08
@ -1,4 +1,4 @@
-! Copyright © 2019 Dynare Team
+! Copyright © 2019-2020 Dynare Team
 !
 ! This file is part of Dynare.
 !
@ -31,31 +31,104 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
  real(real64), dimension(:), allocatable, target :: x
  type(dm_block), dimension(:), allocatable, target :: blocks
  integer :: info, i
-  real(real64), parameter :: tolf = 1e-6_real64
+  real(real64) :: tolf, tolx
+  integer :: maxiter
  real(real64), dimension(:), allocatable :: fvec
  real(real64), dimension(:,:), allocatable :: fjac
+  logical :: debug, specializedunivariateblocks
+  character(len=80) :: debug_msg
+  logical(mxLogical), dimension(:), pointer :: isloggedlhs => null(), &
+       isauxdiffloggedrhs => null()
+  type(c_ptr) :: endo_names, lhs
+  logical :: fre ! True if the last block has been solved (i.e. not evaluated), so that residuals must be updated
+  integer, dimension(:), allocatable :: evaled_cols ! If fre=.false., lists the columns that have been evaluated so far without updating the residuals

-  if (nrhs < 2 .or. nlhs /= 2) then
-     call mexErrMsgTxt("Must have at least 2 inputs and exactly 2 outputs")
+  if (nrhs < 4 .or. nlhs /= 2) then
+     call mexErrMsgTxt("Must have at least 7 inputs and exactly 2 outputs")
     return
  end if

  if (.not. ((mxIsChar(prhs(1)) .and. mxGetM(prhs(1)) == 1) .or. mxIsClass(prhs(1), "function_handle"))) then
-     call mexErrMsgTxt("First argument should be a string or a function handle")
+     call mexErrMsgTxt("First argument (function) should be a string or a function handle")
     return
  end if

  if (.not. (mxIsDouble(prhs(2)) .and. (mxGetM(prhs(2)) == 1 .or. mxGetN(prhs(2)) == 1))) then
-     call mexErrMsgTxt("Second argument should be a real vector")
+     call mexErrMsgTxt("Second argument (initial guess) should be a real vector")
     return
  end if

+  if (.not. (mxIsScalar(prhs(3)) .and. mxIsNumeric(prhs(3)))) then
+     call mexErrMsgTxt("Third argument (tolf) should be a numeric scalar")
+     return
+  end if
+
+  if (.not. (mxIsScalar(prhs(4)) .and. mxIsNumeric(prhs(4)))) then
+     call mexErrMsgTxt("Fourth argument (tolx) should be a numeric scalar")
+     return
+  end if
+
+  if (.not. (mxIsScalar(prhs(5)) .and. mxIsNumeric(prhs(5)))) then
+     call mexErrMsgTxt("Fifth argument (maxiter) should be a numeric scalar")
+     return
+  end if
+
+  if (.not. (mxIsLogicalScalar(prhs(6)))) then
+     call mexErrMsgTxt("Sixth argument (debug) should be a logical scalar")
+     return
+  end if
+
+  if (.not. (mxIsStruct(prhs(7)) .and. &
+       (mxGetNumberOfFields(prhs(7)) == 0 .or. mxGetNumberOfFields(prhs(7)) == 4))) then
+     call mexErrMsgTxt("Seventh argument should be a struct with either 0 or 4 fields")
+     return
+  end if
+  specializedunivariateblocks = (mxGetNumberOfFields(prhs(7)) == 4)
+
  func => prhs(1)
-  extra_args => prhs(3:nrhs)
+  tolf = mxGetScalar(prhs(3))
+  tolx = mxGetScalar(prhs(4))
+  maxiter = int(mxGetScalar(prhs(5)))
+  debug = mxGetScalar(prhs(6)) == 1._c_double
+  extra_args => prhs(8:nrhs) ! Extra arguments to func are in argument 8 and subsequent ones
  associate (x_mat => mxGetPr(prhs(2)))
    allocate(x(size(x_mat)))
    x = x_mat
  end associate
+  if (specializedunivariateblocks) then
+     block
+       type(c_ptr) :: tmp
+       tmp = mxGetField(prhs(7), 1_mwIndex, "isloggedlhs")
+       if (.not. (c_associated(tmp) .and. mxIsLogical(tmp) .and. mxGetNumberOfElements(tmp) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'isloggedlhs' field of type logical, of same size as second argument")
+          return
+       end if
+       isloggedlhs => mxGetLogicals(tmp)
+
+       tmp = mxGetField(prhs(7), 1_mwIndex, "isauxdiffloggedrhs")
+       if (.not. (c_associated(tmp) .and. mxIsLogical(tmp) .and. mxGetNumberOfElements(tmp) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'isauxdiffloggedrhs' field of type &
+               &logical, of same size as second argument")
+          return
+       end if
+       isauxdiffloggedrhs => mxGetLogicals(tmp)
+
+       lhs = mxGetField(prhs(7), 1_mwIndex, "lhs")
+       if (.not. (c_associated(lhs) .and. mxIsCell(lhs) .and. mxGetNumberOfElements(lhs) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'lhs' field of type cell, of same size as second argument")
+          return
+       end if
+
+       endo_names = mxGetField(prhs(7), 1_mwIndex, "endo_names")
+       if (.not. (c_associated(endo_names) .and. mxIsCell(endo_names) .and. mxGetNumberOfElements(endo_names) == size(x))) then
+          call mexErrMsgTxt("Seventh argument must have a 'endo_names' field of type cell, of same size as second argument")
+          return
+       end if
+     end block
+
+     allocate(evaled_cols(0))
+     fre = .false.
+  end if

  allocate(fvec(size(x)))
  allocate(fjac(size(x), size(x)))
@ -65,17 +138,90 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
  call matlab_fcn(x, fvec, fjac)
  call dm_blocks(fjac, blocks)

+  if (debug) then
+     write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): number of blocks = ', i0)") size(blocks)
+     call mexPrintf_trim_newline(debug_msg)
+  end if
+
  ! Solve the system, starting from bottom-rightmost block
-  x_all => x
  do i = size(blocks),1,-1
+     if (debug) then
+        write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' of size ', i0)") &
+             i, size(blocks(i)%col_indices)
+        call mexPrintf_trim_newline(debug_msg)
+     end if
+
+     if (specializedunivariateblocks .and. size(blocks(i)%col_indices) == 1) then
+        if (debug) then
+           write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' by evaluating RHS')") i
+           call mexPrintf_trim_newline(debug_msg)
+        end if
+        associate (eq => blocks(i)%row_indices(1), var => blocks(i)%col_indices(1))
+          if (fre .or. any(abs(fjac(eq, evaled_cols)) > 0._real64)) then
+             ! Reevaluation of the residuals is required because the current RHS depends on
+             ! variables that potentially have been updated previously.
+             nullify(x_indices, f_indices, x_all)
+             call matlab_fcn(x, fvec)
+             deallocate(evaled_cols) ! This shouldn’t be necessary, but it crashes otherwise with gfortran 8
+             allocate(evaled_cols(0))
+             fre = .false.
+          end if
+          evaled_cols = [ evaled_cols, var]
+          block
+            ! An associate() construct for lhs_eq and endo_name_var makes the
+            ! code crash (with double free) using gfortran 8. Hence use a block
+            character(kind=c_char, len=:), allocatable :: lhs_eq, endo_name_var
+            lhs_eq = mxArrayToString(mxGetCell(lhs, int(eq, mwIndex)))
+            endo_name_var = mxArrayToString(mxGetCell(endo_names, int(var, mwIndex)))
+            if (lhs_eq == endo_name_var .or. lhs_eq == "log(" // endo_name_var // ")") then
+               if (isloggedlhs(eq)) then
+                  x(var) = exp(log(x(var)) - fvec(eq))
+               else
+                  x(var) = x(var) - fvec(eq)
+               end if
+            else
+               if (debug) then
+                  write (debug_msg, "('LHS variable is not determined by RHS expression (', i0, ')')") eq
+                  call mexPrintf_trim_newline(debug_msg)
+                  write (debug_msg, "(a, ' -> ', a)") lhs_eq, endo_name_var
+                  call mexPrintf_trim_newline(debug_msg)
+               end if
+               if (lhs_eq(1:9) == "AUX_DIFF_" .or. lhs_eq(1:13) == "log(AUX_DIFF_") then
+                  if (isauxdiffloggedrhs(eq)) then
+                     x(var) = exp(log(x(var)) + fvec(eq))
+                  else
+                     x(var) = x(var) + fvec(eq)
+                  end if
+               else
+                  call mexErrMsgTxt("Algorithm solve_algo=14 cannot be used with this nonlinear problem")
+                  return
+               end if
+            end if
+          end block
+        end associate
+        cycle
+     else
+        if (debug) then
+           write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' with trust region routine')") i
+           call mexPrintf_trim_newline(debug_msg)
+        end if
+     end if
+
     block
       real(real64), dimension(size(blocks(i)%col_indices)) :: x_block
       x_indices => blocks(i)%col_indices
       f_indices => blocks(i)%row_indices
+       x_all => x
+       if (size(x_indices) /= size(f_indices)) then
+          call mexErrMsgTxt("Non-square block")
+          return
+       end if
       x_block = x(x_indices)
-       call trust_region_solve(x_block, matlab_fcn, info, tolf = tolf)
+       call trust_region_solve(x_block, matlab_fcn, info, tolx, tolf, maxiter)
       x(x_indices) = x_block
     end block
+
+     fre = .true.
  end do

  ! Verify that we have a solution
@ -88,7 +234,9 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
  nullify(x_indices, f_indices, x_all)
  call matlab_fcn(x, fvec)
  if (maxval(abs(fvec)) > tolf) then
-     call trust_region_solve(x, matlab_fcn, info, tolf = tolf)
+     if (debug) &
+          call mexPrintf_trim_newline("DYNARE_SOLVE (solve_algo=13|14): residuals still too large, solving for the whole model")
+     call trust_region_solve(x, matlab_fcn, info, tolx, tolf, maxiter)
  else
     info = 1
  end if
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@ -134,6 +134,7 @@ MODFILES = \
 	steady_state/walsh1_ssm_block.mod \
 	steady_state/multi_leads.mod \
 	steady_state/example1_trust_region.mod \
+	steady_state/example1_block_trust_region.mod \
 	steady_state/Gali_2015_chapter_6_4.mod \
 	steady_state_operator/standard.mod \
 	steady_state_operator/use_dll.mod \
--- a/tests/steady_state/example1_block_trust_region.mod
+++ b/tests/steady_state/example1_block_trust_region.mod
@ -0,0 +1,47 @@
+// Test block trust region nonlinear solver (solve_algo=13)
+var y, c, k, a, h, b;
+varexo e, u;
+
+parameters beta, rho, alpha, delta, theta, psi, tau;
+
+alpha = 0.36;
+rho   = 0.95;
+tau   = 0.025;
+beta  = 0.99;
+delta = 0.025;
+psi   = 0;
+theta = 2.95;
+
+phi   = 0.1;
+
+model;
+c*theta*h^(1+psi)=(1-alpha)*y;
+k = beta*(((exp(b)*c)/(exp(b(+1))*c(+1)))
+    *(exp(b(+1))*alpha*y(+1)+(1-delta)*k));
+y = exp(a)*(k(-1)^alpha)*(h^(1-alpha));
+k = exp(b)*(y-c)+(1-delta)*k(-1);
+a = rho*a(-1)+tau*b(-1) + e;
+b = tau*a(-1)+rho*b(-1) + u;
+end;
+
+initval;
+y = 1;
+c = 0.8;
+h = 0.3;
+k = 10;
+a = 0;
+b = 0;
+e = 0;
+u = 0;
+end;
+
+options_.debug = true;
+steady(solve_algo=13);
+
+shocks;
+var e; stderr 0.009;
+var u; stderr 0.009;
+var e, u = phi*0.009*0.009;
+end;
+
+stoch_simul(order=1,nomoments,irf=0);