From 9430b4e9ca3427be6edf476c357ba46e1614e642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Wed, 22 Jul 2020 17:57:12 +0200 Subject: [PATCH 1/6] Block trust region MEX: add debugging information --- .../block_trust_region/mexFunction.f08 | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/mex/sources/block_trust_region/mexFunction.f08 b/mex/sources/block_trust_region/mexFunction.f08 index 7377af47c..95bfb4c80 100644 --- a/mex/sources/block_trust_region/mexFunction.f08 +++ b/mex/sources/block_trust_region/mexFunction.f08 @@ -34,9 +34,11 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') real(real64), parameter :: tolf = 1e-6_real64 real(real64), dimension(:), allocatable :: fvec real(real64), dimension(:,:), allocatable :: fjac + logical :: debug + character(len=80) :: debug_msg - if (nrhs < 2 .or. nlhs /= 2) then - call mexErrMsgTxt("Must have at least 2 inputs and exactly 2 outputs") + if (nrhs < 3 .or. nlhs /= 2) then + call mexErrMsgTxt("Must have at least 3 inputs and exactly 2 outputs") return end if @@ -50,8 +52,14 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') return end if + if (.not. (mxIsLogicalScalar(prhs(3)))) then + call mexErrMsgTxt("Third argument should be a logical scalar") + return + end if + func => prhs(1) - extra_args => prhs(3:nrhs) + debug = mxGetScalar(prhs(3)) == 1._c_double + extra_args => prhs(4:nrhs) associate (x_mat => mxGetPr(prhs(2))) allocate(x(size(x_mat))) x = x_mat @@ -65,9 +73,20 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') call matlab_fcn(x, fvec, fjac) call dm_blocks(fjac, blocks) + if (debug) then + write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): number of blocks = ', i0)") size(blocks) + call mexPrintf_trim_newline(debug_msg) + end if + ! Solve the system, starting from bottom-rightmost block x_all => x do i = size(blocks),1,-1 + if (debug) then + write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' of size ', i0)") & + i, size(blocks(i)%col_indices) + call mexPrintf_trim_newline(debug_msg) + end if + block real(real64), dimension(size(blocks(i)%col_indices)) :: x_block x_indices => blocks(i)%col_indices @@ -88,6 +107,8 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') nullify(x_indices, f_indices, x_all) call matlab_fcn(x, fvec) if (maxval(abs(fvec)) > tolf) then + if (debug) & + call mexPrintf_trim_newline("DYNARE_SOLVE (solve_algo=13|14): residuals still too large, solving for the whole model") call trust_region_solve(x, matlab_fcn, info, tolf = tolf) else info = 1 From adf1fdb009a87ef54cf8f5a28f5a9ceb056c1c90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Wed, 22 Jul 2020 18:01:31 +0200 Subject: [PATCH 2/6] Block trust region MEX: add safety check for squareness of blocks --- mex/sources/block_trust_region/mexFunction.f08 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mex/sources/block_trust_region/mexFunction.f08 b/mex/sources/block_trust_region/mexFunction.f08 index 95bfb4c80..d375c4250 100644 --- a/mex/sources/block_trust_region/mexFunction.f08 +++ b/mex/sources/block_trust_region/mexFunction.f08 @@ -91,6 +91,10 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') real(real64), dimension(size(blocks(i)%col_indices)) :: x_block x_indices => blocks(i)%col_indices f_indices => blocks(i)%row_indices + if (size(x_indices) /= size(f_indices)) then + call mexErrMsgTxt("Non-square block") + return + end if x_block = x(x_indices) call trust_region_solve(x_block, matlab_fcn, info, tolf = tolf) x(x_indices) = x_block From 0b41d17374488f1357ec9cce0e0a9b36aac790ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Mon, 27 Jul 2020 17:34:13 +0200 Subject: [PATCH 3/6] dynare_solve: fix display of number of dmperm blocks --- matlab/dynare_solve.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matlab/dynare_solve.m b/matlab/dynare_solve.m index 3bb7ecc03..079bc1429 100644 --- a/matlab/dynare_solve.m +++ b/matlab/dynare_solve.m @@ -251,7 +251,7 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14]) [j1,j2,r,s] = dmperm(fjac); JAC = abs(fjac(j1,j2))>0; if options.debug - disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r))]); + disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r)-1)]); end l = 0; fre = false; From 35a162c6a6cebd9e5d2b9ede9db58f9442ea6809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Mon, 7 Sep 2020 16:13:59 +0200 Subject: [PATCH 4/6] Block trust region MEX: fix memory leak in MATLAB function closure --- mex/sources/block_trust_region/matlab_fcn_closure.f08 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mex/sources/block_trust_region/matlab_fcn_closure.f08 b/mex/sources/block_trust_region/matlab_fcn_closure.f08 index 9565d8f74..d6fc00ea8 100644 --- a/mex/sources/block_trust_region/matlab_fcn_closure.f08 +++ b/mex/sources/block_trust_region/matlab_fcn_closure.f08 @@ -11,7 +11,7 @@ ! be set in order to give the input values for the indices that are not passed ! to matlab_fcn. -! Copyright © 2019 Dynare Team +! Copyright © 2019-2020 Dynare Team ! ! This file is part of Dynare. ! @@ -80,12 +80,16 @@ contains if (mexCallMATLAB(nlhs, call_lhs, int(size(call_rhs), c_int), call_rhs, "feval") /= 0) & call mexErrMsgTxt("Error calling function to be solved") + call mxDestroyArray(call_rhs(2)) + fvec_all => mxGetPr(call_lhs(1)) if (associated(f_indices)) then fvec = fvec_all(f_indices) else fvec = fvec_all end if + call mxDestroyArray(call_lhs(1)) + if (present(fjac)) then fjac_all(1:n_all,1:n_all) => mxGetPr(call_lhs(2)) if (associated(x_indices) .and. associated(f_indices)) then @@ -93,6 +97,7 @@ contains else fjac = fjac_all end if + call mxDestroyArray(call_lhs(2)) end if end subroutine matlab_fcn end module matlab_fcn_closure From 7e21bf2a10189ef4c1d8673a7f736c294d23d4ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Tue, 8 Sep 2020 16:23:32 +0200 Subject: [PATCH 5/6] =?UTF-8?q?Block=20trust=20region=20MEX:=20use=20MATLA?= =?UTF-8?q?B=E2=80=99s=20dmperm=20for=20the=20Dulmage-Mendelsohn=20decompo?= =?UTF-8?q?sition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that MATLAB’s implementation is significantly faster than my own Fortran implementation. --- .../block_trust_region/dulmage_mendelsohn.f08 | 585 ++---------------- 1 file changed, 35 insertions(+), 550 deletions(-) diff --git a/mex/sources/block_trust_region/dulmage_mendelsohn.f08 b/mex/sources/block_trust_region/dulmage_mendelsohn.f08 index 45e48d82a..56122bedb 100644 --- a/mex/sources/block_trust_region/dulmage_mendelsohn.f08 +++ b/mex/sources/block_trust_region/dulmage_mendelsohn.f08 @@ -1,16 +1,7 @@ -! Implementation of the Dulmage-Mendelsohn decomposition -! -! Closely follows the description of Pothen and Fan (1990), “Computing the -! Block Triangular Form of a Sparse Matrix”, ACM Transactions on Mathematical -! Software, Vol. 16, No. 4, pp. 303–324 -! -! In addition, also computes the directed acyclic graph (DAG) formed by the -! fine blocks, in order to optimize the parallel resolution of a linear system. -! The last (i.e. lower-right) block has no predecessor (since this is an -! *upper* block triangular form), and the other blocks may have predecessors if -! they use variables computed by blocks further on the right. +! Wrapper around MATLAB’s dmperm to compute the Dulmage-Mendelsohn +! decomposition -! Copyright © 2019 Dynare Team +! Copyright © 2020 Dynare Team ! ! This file is part of Dynare. ! @@ -29,559 +20,53 @@ module dulmage_mendelsohn use iso_fortran_env + use matlab_mex implicit none - private - public :: dm_block, dmperm, dm_blocks - ! Represents a block in the fine DM decomposition type :: dm_block integer, dimension(:), allocatable :: row_indices, col_indices - character :: coarse_type ! Place in the coarse decomposition, either 'H', 'S' or 'V' - integer, dimension(:), allocatable :: predecessors, successors ! Predecessors and successors in the DAG formed by fine DM blocks end type dm_block - - type :: vertex - integer, dimension(:), allocatable :: edges - integer :: id ! Index of row of column represented by this vertex - end type vertex - - type :: matrix_graph - type(vertex), dimension(:), allocatable :: row_vertices, col_vertices - integer, dimension(:), allocatable :: row_matching, col_matching ! Maximum matching - character, dimension(:), allocatable :: row_coarse, col_coarse ! Coarse decomposition - type(dm_block), dimension(:), allocatable :: fine_blocks ! Fine decomposition - integer, dimension(:), allocatable :: row_fine_block, col_fine_block ! Maps rows/cols to their block index - contains - procedure :: init => matrix_graph_init - procedure :: maximum_matching => matrix_graph_maximum_matching - procedure :: coarse_decomposition => matrix_graph_coarse_decomposition - procedure :: fine_decomposition => matrix_graph_fine_decomposition - procedure :: fine_blocks_dag => matrix_graph_fine_blocks_dag - end type matrix_graph - contains - ! Initialize a matrix_graph object, given a matrix - subroutine matrix_graph_init(this, mat) - class(matrix_graph), intent(inout) :: this - real(real64), dimension(:, :), intent(in) :: mat - - integer :: i, j - - if (size(mat, 1) < size(mat, 2)) & - error stop "Matrix has less rows than columns; consider transposing it" - - ! Create row vertices - allocate(this%row_vertices(size(mat, 1))) - do i = 1, size(mat, 1) - this%row_vertices(i)%edges = pack([ (j, j=1,size(mat, 2)) ], mat(i, :) /= 0_real64) - this%row_vertices(i)%id = i - end do - - ! Create column vertices - allocate(this%col_vertices(size(mat, 2))) - do i = 1, size(mat, 2) - this%col_vertices(i)%edges = pack([ (j, j=1, size(mat, 1)) ], mat(:, i) /= 0_real64) - this%col_vertices(i)%id = i - end do - end subroutine matrix_graph_init - - ! Computes the maximum matching for this graph - subroutine matrix_graph_maximum_matching(this) - class(matrix_graph), intent(inout) :: this - - logical, dimension(size(this%row_vertices)) :: row_visited - integer, dimension(:), allocatable :: col_unmatched, col_unmatched_new - integer :: i, j - - if (allocated(this%row_matching) .or. allocated(this%col_matching)) & - error stop "Maximum matching already computed" - - allocate(this%row_matching(size(this%row_vertices))) - allocate(this%col_matching(size(this%col_vertices))) - - this%row_matching = 0 - this%col_matching = 0 - allocate(col_unmatched(0)) - - ! Compute cheap matching - do i = 1, size(this%col_vertices) - match_col: do j = 1, size(this%col_vertices(i)%edges) - associate (v => this%col_vertices(i)%edges(j)) - if (this%row_matching(v) == 0) then - this%row_matching(v) = i - this%col_matching(i) = v - exit match_col - end if - end associate - end do match_col - if (this%col_matching(i) == 0) col_unmatched = [ col_unmatched, i ] - end do - - ! Augment matching - allocate(col_unmatched_new(0)) - do - row_visited = .false. - do i = 1, size(col_unmatched) - call search_augmenting_path(col_unmatched(i)) - end do - if (size(col_unmatched) == size(col_unmatched_new)) exit - call move_alloc(from=col_unmatched_new, to=col_unmatched) - allocate(col_unmatched_new(0)) - end do - contains - ! Depth-first search for an augmenting path - ! The algorithm is written iteratively (and not recursively), because - ! otherwise the stack could overflow on large matrices - subroutine search_augmenting_path(col) - integer, intent(in) :: col - - integer, dimension(:), allocatable :: row_path, col_path ! Path visited so far - integer, dimension(size(this%col_vertices)) :: col_edge ! For each column, keeps the last visited edge index - integer :: i - - col_edge = 0 - allocate(col_path(1)) - col_path(1) = col - allocate(row_path(0)) - - do - ! Depth-first search, starting from last column in the path - associate (current_col => col_path(size(col_path))) - col_edge(current_col) = col_edge(current_col) + 1 - if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then - ! We visited all edges from this column, need to backtrack - if (size(col_path) == 1) then - ! The search failed - col_unmatched_new = [ col_unmatched_new, col ] - return - end if - col_path = col_path(:size(col_path)-1) - row_path = row_path(:size(row_path)-1) - else - associate (current_row => this%col_vertices(current_col)%edges(col_edge(current_col))) - if (.not. row_visited(current_row)) then - row_visited(current_row) = .true. - row_path = [ row_path, current_row ] - if (this%row_matching(current_row) == 0) then - ! We found an augmenting path, update matching and return - do i = 1, size(col_path) - this%row_matching(row_path(i)) = col_path(i) - this%col_matching(col_path(i)) = row_path(i) - end do - return - end if - col_path = [ col_path, this%row_matching(current_row)] - end if - end associate - end if - end associate - end do - end subroutine search_augmenting_path - end subroutine matrix_graph_maximum_matching - - ! Computes the coarse decomposition - ! The {row,col}_coarse arrays are filled with characters 'H', 'S' and 'V' - subroutine matrix_graph_coarse_decomposition(this) - class(matrix_graph), intent(inout) :: this - - integer :: i - - if (allocated(this%row_coarse) .or. allocated(this%col_coarse)) & - error stop "Coarse decomposition already computed" - - if (.not. (allocated(this%row_matching) .and. allocated(this%col_matching))) & - error stop "Maximum matching not yet computed" - - allocate(this%row_coarse(size(this%row_vertices))) - allocate(this%col_coarse(size(this%col_vertices))) - - ! Initialize partition - this%row_coarse = 'S' - this%col_coarse = 'S' - - ! Identify A_h - do i = 1, size(this%col_vertices) - if (this%col_matching(i) /= 0) cycle - this%col_coarse(i) = 'H' - call alternating_dfs_col(i) - end do - - ! Identify A_v - do i = 1, size(this%row_vertices) - if (this%row_matching(i) /= 0) cycle - this%row_coarse(i) = 'V' - call alternating_dfs_row(i) - end do - contains - ! Depth-first search along alternating path, starting from a column - ! Written iteratively, to avoid stack overflow on large matrices - subroutine alternating_dfs_col(col) - integer, intent(in) :: col - - integer, dimension(size(this%col_vertices)) :: col_edge - integer, dimension(:), allocatable :: col_path ! path visited so far - - col_edge = 0 - allocate(col_path(1)) - col_path(1) = col - - do - associate (current_col => col_path(size(col_path))) - col_edge(current_col) = col_edge(current_col) + 1 - if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then - ! We visited all edges from this column, need to backtrack - if (size(col_path) == 1) return - col_path = col_path(:size(col_path)-1) - else - associate (current_row => this%col_vertices(current_col)%edges(col_edge(current_col))) - if (this%row_coarse(current_row) /= 'H') then - this%row_coarse(current_row) = 'H' - if (this%row_matching(current_row) /= 0) then - this%col_coarse(this%row_matching(current_row)) = 'H' - col_path = [ col_path, this%row_matching(current_row)] - end if - end if - end associate - end if - end associate - end do - end subroutine alternating_dfs_col - - ! Depth-first search along alternating path, starting from a row - ! This is the perfect symmetric of alternating_dfs_col (swapping "col" and "row") - subroutine alternating_dfs_row(row) - integer, intent(in) :: row - - integer, dimension(size(this%row_vertices)) :: row_edge - integer, dimension(:), allocatable :: row_path ! path visited so far - - row_edge = 0 - allocate(row_path(1)) - row_path(1) = row - - do - associate (current_row => row_path(size(row_path))) - row_edge(current_row) = row_edge(current_row) + 1 - if (row_edge(current_row) > size(this%row_vertices(current_row)%edges)) then - ! We visited all edges from this row, need to backtrack - if (size(row_path) == 1) return - row_path = row_path(:size(row_path)-1) - else - associate (current_col => this%row_vertices(current_row)%edges(row_edge(current_row))) - if (this%col_coarse(current_col) /= 'V') then - this%col_coarse(current_col) = 'V' - if (this%col_matching(current_col) /= 0) then - this%row_coarse(this%col_matching(current_col)) = 'V' - row_path = [ row_path, this%col_matching(current_col)] - end if - end if - end associate - end if - end associate - end do - end subroutine alternating_dfs_row - end subroutine matrix_graph_coarse_decomposition - - ! Computes the coarse decomposition - ! Allocate and fills the this%row_fine_* and this%col_fine* arrays - subroutine matrix_graph_fine_decomposition(this) - class(matrix_graph), intent(inout) :: this - - ! Index of next block to be discovered - integer :: block_index - - ! Used in DFS (both in dfs_H_or_V and strongconnect) - integer, dimension(size(this%row_vertices)) :: row_edge - integer, dimension(size(this%col_vertices)) :: col_edge - - ! For dfs_H_or_V - logical, dimension(size(this%row_vertices)) :: row_visited - logical, dimension(size(this%col_vertices)) :: col_visited - - ! For strongconnect - integer, dimension(:), allocatable :: col_stack - logical, dimension(size(this%col_vertices)) :: col_on_stack - integer, dimension(size(this%col_vertices)) :: col_index, col_lowlink - integer :: index - - integer :: i - - if (allocated(this%fine_blocks) & - .or. allocated(this%row_fine_block) .or. allocated(this%col_fine_block)) & - error stop "Fine decomposition already computed" - - if (.not. (allocated(this%row_coarse) .and. allocated(this%col_coarse))) & - error stop "Coarse decomposition not yet computed" - - allocate(this%row_fine_block(size(this%row_vertices))) - allocate(this%col_fine_block(size(this%col_vertices))) - allocate(this%fine_blocks(0)) - block_index = 1 - - row_visited = .false. - col_visited = .false. - row_edge = 0 - col_edge = 0 - - ! Compute blocks in H - do i = 1, size(this%row_vertices) - if (this%row_coarse(i) == 'H' .and. .not. row_visited(i)) call dfs_H_or_V(i, .true., 'H') - end do - do i = 1, size(this%col_vertices) - if (this%col_coarse(i) == 'H' .and. .not. col_visited(i)) call dfs_H_or_V(i, .false., 'H') - end do - - ! Compute blocks in S (Tarjan algorithm on graph consisting only of column - ! vertices, where rows have been merged with their matching columns) - allocate(col_stack(0)) - col_on_stack = .false. - index = 0 - col_index = -1 - do i = 1, size(this%col_vertices) - if (this%col_coarse(i) == 'S' .and. col_index(i) == -1) call strongconnect(i) - end do - - ! Compute blocks in V - do i = 1, size(this%row_vertices) - if (this%row_coarse(i) == 'V' .and. .not. row_visited(i)) call dfs_H_or_V(i, .true., 'V') - end do - do i = 1, size(this%col_vertices) - if (this%col_coarse(i) == 'V' .and. .not. col_visited(i)) call dfs_H_or_V(i, .false., 'V') - end do - contains - ! Depth-first search for identifying one block inside the H or V sub-matrix - subroutine dfs_H_or_V(id, is_row, coarse_type) - integer, intent(in) :: id ! Start vertex ID - logical, intent(in) :: is_row ! Whether start vertex is a row - character, intent(in) :: coarse_type ! Either 'H' or 'V' - - integer, dimension(:), allocatable :: path - type(dm_block) :: new_block - - allocate(path(1)) - path(1) = id - - if (is_row) then - this%row_fine_block(id) = block_index - new_block%row_indices = [ id ] - allocate(new_block%col_indices(0)) - row_visited(id) = .true. - else - this%col_fine_block(id) = block_index - new_block%col_indices = [ id ] - allocate(new_block%row_indices(0)) - col_visited(id) = .true. - end if - - do - associate (current_id => path(size(path))) - if ((mod(size(path), 2) == 0) .neqv. is_row) then ! Current vertex is a row - row_edge(current_id) = row_edge(current_id) + 1 - if (row_edge(current_id) > size(this%row_vertices(current_id)%edges)) then - if (size(path) == 1) exit - path = path(:size(path)-1) - else - associate (next_id => this%row_vertices(current_id)%edges(row_edge(current_id))) - if (this%col_coarse(next_id) == coarse_type .and. .not. col_visited(next_id)) then - col_visited(next_id) = .true. - path = [ path, next_id ] - this%col_fine_block(next_id) = block_index - new_block%col_indices = [ new_block%col_indices, next_id ] - end if - end associate - end if - else ! Current vertex is a column - col_edge(current_id) = col_edge(current_id) + 1 - if (col_edge(current_id) > size(this%col_vertices(current_id)%edges)) then - if (size(path) == 1) exit - path = path(:size(path)-1) - else - associate (next_id => this%col_vertices(current_id)%edges(col_edge(current_id))) - if (this%row_coarse(next_id) == coarse_type .and. .not. row_visited(next_id)) then - row_visited(next_id) = .true. - path = [ path, next_id ] - this%row_fine_block(next_id) = block_index - new_block%row_indices = [ new_block%row_indices, next_id ] - end if - end associate - end if - end if - end associate - end do - - new_block%coarse_type = coarse_type - this%fine_blocks = [ this%fine_blocks, new_block ] - block_index = block_index + 1 - end subroutine dfs_H_or_V - - ! Iterative version of the strongconnect routine from: - ! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm - subroutine strongconnect(col) - integer, intent(in) :: col - - integer, dimension(:), allocatable :: col_path - - col_index(col) = index - col_lowlink(col) = index - index = index + 1 - col_stack = [ col_stack, col ] - col_on_stack(col) = .true. - - allocate(col_path(1)) - col_path(1) = col - - do - associate (current_col => col_path(size(col_path))) - col_edge(current_col) = col_edge(current_col) + 1 - if (col_edge(current_col) > size(this%col_vertices(current_col)%edges)) then - ! We visited all edges from this col - ! If this is a root node, pop the stack and generate an SCC - if (col_lowlink(current_col) == col_index(current_col)) then - associate (current_idx => findloc_back(col_stack, current_col)) - associate (cols => col_stack(current_idx:)) - col_on_stack(cols) = .false. - associate (rows => this%col_matching(cols)) - this%col_fine_block(cols) = block_index - this%row_fine_block(rows) = block_index - this%fine_blocks = [ this%fine_blocks, dm_block(rows, cols, 'S') ] - end associate - end associate - block_index = block_index + 1 - col_stack = col_stack(:current_idx-1) - end associate - end if - - if (size(col_path) == 1) return - - associate (previous_col => col_path(size(col_path)-1)) - col_lowlink(previous_col) = min(col_lowlink(previous_col), col_lowlink(current_col)) - end associate - col_path = col_path(:size(col_path)-1) - else - associate (next_col => this%row_matching(this%col_vertices(current_col)%edges(col_edge(current_col)))) - if (this%col_coarse(next_col) == 'S' .and. col_index(next_col) == -1) then - col_index(next_col) = index - col_lowlink(next_col) = index - index = index + 1 - col_stack = [ col_stack, next_col ] - col_on_stack(next_col) = .true. - - col_path = [ col_path, next_col ] - else if (col_on_stack(next_col)) then - col_lowlink(current_col) = min(col_lowlink(current_col), col_index(next_col)) - end if - end associate - end if - end associate - end do - end subroutine strongconnect - - ! Equivalent of findloc(array, val, back = .true.) for rank-1 arrays - ! findloc is in the F2008 standard, but only implemented since gfortran 9 - function findloc_back(array, val) - integer, dimension(:), intent(in) :: array - integer, intent(in) :: val - integer :: findloc_back - - do findloc_back = ubound(array,1),lbound(array,1),-1 - if (array(findloc_back) == val) return - end do - error stop "Can’t find element" - end function findloc_back - - end subroutine matrix_graph_fine_decomposition - - ! Computes the DAG formed by fine blocks - subroutine matrix_graph_fine_blocks_dag(this) - class(matrix_graph), intent(inout) :: this - - integer :: blck, i, j - logical, dimension(size(this%fine_blocks)) :: marked - - ! Compute predecessors in the DAG - do blck = 1,size(this%fine_blocks) - marked = .false. - associate (row_indices => this%fine_blocks(blck)%row_indices) - do i = 1,size(row_indices) - associate (row_vertex => this%row_vertices(row_indices(i))) - do j = 1,size(row_vertex%edges) - marked(this%col_fine_block(row_vertex%edges(j))) = .true. - end do - end associate - end do - end associate - marked(blck) = .false. - this%fine_blocks(blck)%predecessors = pack([ (i, i=1,size(this%fine_blocks)) ], marked) - end do - - ! Compute successors in the DAG - do blck = 1,size(this%fine_blocks) - marked = .false. - associate (col_indices => this%fine_blocks(blck)%col_indices) - do i = 1,size(col_indices) - associate (col_vertex => this%col_vertices(col_indices(i))) - do j = 1,size(col_vertex%edges) - marked(this%row_fine_block(col_vertex%edges(j))) = .true. - end do - end associate - end do - end associate - marked(blck) = .false. - this%fine_blocks(blck)%successors = pack([ (i, i=1,size(this%fine_blocks)) ], marked) - end do - end subroutine matrix_graph_fine_blocks_dag - - ! Compute the blocks from the fine decomposition, including the DAG they form subroutine dm_blocks(mat, blocks) real(real64), dimension(:, :), intent(in) :: mat type(dm_block), dimension(:), allocatable, intent(out) :: blocks - type(matrix_graph) :: mg + type(c_ptr), dimension(1) :: call_rhs + type(c_ptr), dimension(4) :: call_lhs + real(real64), dimension(:, :), pointer :: mat_mx + real(real64), dimension(:), pointer :: p, q, r, s + integer :: i, j - call mg%init(mat) - call mg%maximum_matching - call mg%coarse_decomposition - call mg%fine_decomposition - call mg%fine_blocks_dag + call_rhs(1) = mxCreateDoubleMatrix(int(size(mat, 1), mwSize), int(size(mat, 2), mwSize), mxREAL) + mat_mx(1:size(mat,1), 1:size(mat,2)) => mxGetPr(call_rhs(1)) + mat_mx = mat - call move_alloc(mg%fine_blocks, blocks) - end subroutine dm_blocks + if (mexCallMATLAB(4_c_int, call_lhs, 1_c_int, call_rhs, "dmperm") /= 0) & + call mexErrMsgTxt("Error calling dmperm") - ! Equivalent of dmperm function of MATLAB/Octave - subroutine dmperm(mat, row_order, col_order, row_blocks, col_blocks) - real(real64), dimension(:, :), intent(in) :: mat - integer, dimension(size(mat, 1)), intent(out) :: row_order - integer, dimension(size(mat, 2)), intent(out) :: col_order - integer, dimension(:), allocatable, intent(out) :: row_blocks, col_blocks + call mxDestroyArray(call_rhs(1)) - type(matrix_graph) :: mg - integer :: blck, i + p => mxGetPr(call_lhs(1)) + q => mxGetPr(call_lhs(2)) + r => mxGetPr(call_lhs(3)) + s => mxGetPr(call_lhs(4)) - call mg%init(mat) - call mg%maximum_matching - call mg%coarse_decomposition - call mg%fine_decomposition - - allocate(row_blocks(size(mg%fine_blocks)+1)) - allocate(col_blocks(size(mg%fine_blocks)+1)) - - row_blocks(1) = 1 - col_blocks(1) = 1 - - do blck = 1,size(mg%fine_blocks) - associate (row_indices => mg%fine_blocks(blck)%row_indices) - do i = 1,size(row_indices) - row_order(row_blocks(blck)+i-1) = row_indices(i) - end do - row_blocks(blck+1) = row_blocks(blck)+i-1 - end associate - associate (col_indices => mg%fine_blocks(blck)%col_indices) - do i = 1,size(col_indices) - col_order(col_blocks(blck)+i-1) = col_indices(i) - end do - col_blocks(blck+1) = col_blocks(blck)+i-1 - end associate + allocate(blocks(size(r)-1)) + do i = 1, size(r)-1 + allocate(blocks(i)%row_indices(int(r(i+1)-r(i)))) + do j = 1, int(r(i+1)-r(i)) + blocks(i)%row_indices(j) = int(p(j+int(r(i))-1)) + end do + allocate(blocks(i)%col_indices(int(s(i+1)-s(i)))) + do j = 1, int(s(i+1)-s(i)) + blocks(i)%col_indices(j) = int(q(j+int(s(i))-1)) + end do end do - end subroutine dmperm + + call mxDestroyArray(call_lhs(1)) + call mxDestroyArray(call_lhs(2)) + call mxDestroyArray(call_lhs(3)) + call mxDestroyArray(call_lhs(4)) + end subroutine dm_blocks end module dulmage_mendelsohn From 865ab47fa9d4794ea52bf9596309b92c1b96d029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Thu, 16 Jul 2020 18:20:07 +0200 Subject: [PATCH 6/6] Provide block_trust_region MEX under solve_algo 13 and 14 - block trust region solver now available under solve_algo=13 It is essentially the same as solve_algo=4, except that Jacobian by finite difference is not handled. A test file is added for that case - block trust region solver with shortcut for equations that can be evaluated is now available under solve_algo=14 (in replacement of the pure-MATLAB solver) Closes: Enterprise/dynare#3 --- matlab/dynare_solve.m | 27 +++- matlab/steady_.m | 4 +- mex/build/block_trust_region.am | 1 + .../block_trust_region/mexFunction.f08 | 151 ++++++++++++++++-- tests/Makefile.am | 1 + .../example1_block_trust_region.mod | 47 ++++++ 6 files changed, 208 insertions(+), 23 deletions(-) create mode 100644 tests/steady_state/example1_block_trust_region.mod diff --git a/matlab/dynare_solve.m b/matlab/dynare_solve.m index 079bc1429..ba76c3ffc 100644 --- a/matlab/dynare_solve.m +++ b/matlab/dynare_solve.m @@ -232,13 +232,13 @@ elseif options.solve_algo==9 [x, errorflag] = trust_region(f, x, 1:nn, 1:nn, jacobian_flag, options.gstep, ... tolf, tolx, ... maxit, options.debug, arguments{:}); -elseif ismember(options.solve_algo, [2, 12, 4, 14]) +elseif ismember(options.solve_algo, [2, 12, 4]) if ismember(options.solve_algo, [2, 12]) solver = @solve1; else solver = @trust_region; end - specializedunivariateblocks = ismember(options.solve_algo, [12, 14]); + specializedunivariateblocks = options.solve_algo == 12; if ~jacobian_flag fjac = zeros(nn,nn) ; dh = max(abs(x), options.gstep(1)*ones(nn,1))*eps^(1/3); @@ -251,19 +251,19 @@ elseif ismember(options.solve_algo, [2, 12, 4, 14]) [j1,j2,r,s] = dmperm(fjac); JAC = abs(fjac(j1,j2))>0; if options.debug - disp(['DYNARE_SOLVE (solve_algo=2|4|12|14): number of blocks = ' num2str(length(r)-1)]); + disp(['DYNARE_SOLVE (solve_algo=2|4|12): number of blocks = ' num2str(length(r)-1)]); end l = 0; fre = false; for i=length(r)-1:-1:1 blocklength = r(i+1)-r(i); if options.debug - dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u of size %u.', i, blocklength); + dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u of size %u.', i, blocklength); end j = r(i):r(i+1)-1; if specializedunivariateblocks if options.debug - dprintf('DYNARE_SOLVE (solve_algo=2|4|12|14): solving block %u by evaluating RHS.', i); + dprintf('DYNARE_SOLVE (solve_algo=2|4|12): solving block %u by evaluating RHS.', i); end if isequal(blocklength, 1) if i. -if options_.solve_algo < 0 || options_.solve_algo > 12 - error('STEADY: solve_algo must be between 0 and 12') +if options_.solve_algo < 0 || options_.solve_algo > 14 + error('STEADY: solve_algo must be between 0 and 14') end if ~options_.bytecode && ~options_.block && options_.solve_algo > 4 && ... diff --git a/mex/build/block_trust_region.am b/mex/build/block_trust_region.am index cb72f64a9..0a3e3f8e0 100644 --- a/mex/build/block_trust_region.am +++ b/mex/build/block_trust_region.am @@ -11,6 +11,7 @@ nodist_block_trust_region_SOURCES = \ BUILT_SOURCES = $(nodist_block_trust_region_SOURCES) CLEANFILES = $(nodist_block_trust_region_SOURCES) +dulmage_mendelsohn.o: matlab_mex.mod dulmage_mendelsohn.mod: dulmage_mendelsohn.o matlab_fcn_closure.mod: matlab_fcn_closure.o diff --git a/mex/sources/block_trust_region/mexFunction.f08 b/mex/sources/block_trust_region/mexFunction.f08 index d375c4250..35d9e7d7a 100644 --- a/mex/sources/block_trust_region/mexFunction.f08 +++ b/mex/sources/block_trust_region/mexFunction.f08 @@ -1,4 +1,4 @@ -! Copyright © 2019 Dynare Team +! Copyright © 2019-2020 Dynare Team ! ! This file is part of Dynare. ! @@ -31,39 +31,104 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') real(real64), dimension(:), allocatable, target :: x type(dm_block), dimension(:), allocatable, target :: blocks integer :: info, i - real(real64), parameter :: tolf = 1e-6_real64 + real(real64) :: tolf, tolx + integer :: maxiter real(real64), dimension(:), allocatable :: fvec real(real64), dimension(:,:), allocatable :: fjac - logical :: debug + logical :: debug, specializedunivariateblocks character(len=80) :: debug_msg + logical(mxLogical), dimension(:), pointer :: isloggedlhs => null(), & + isauxdiffloggedrhs => null() + type(c_ptr) :: endo_names, lhs + logical :: fre ! True if the last block has been solved (i.e. not evaluated), so that residuals must be updated + integer, dimension(:), allocatable :: evaled_cols ! If fre=.false., lists the columns that have been evaluated so far without updating the residuals - if (nrhs < 3 .or. nlhs /= 2) then - call mexErrMsgTxt("Must have at least 3 inputs and exactly 2 outputs") + if (nrhs < 4 .or. nlhs /= 2) then + call mexErrMsgTxt("Must have at least 7 inputs and exactly 2 outputs") return end if if (.not. ((mxIsChar(prhs(1)) .and. mxGetM(prhs(1)) == 1) .or. mxIsClass(prhs(1), "function_handle"))) then - call mexErrMsgTxt("First argument should be a string or a function handle") + call mexErrMsgTxt("First argument (function) should be a string or a function handle") return end if if (.not. (mxIsDouble(prhs(2)) .and. (mxGetM(prhs(2)) == 1 .or. mxGetN(prhs(2)) == 1))) then - call mexErrMsgTxt("Second argument should be a real vector") + call mexErrMsgTxt("Second argument (initial guess) should be a real vector") return end if - if (.not. (mxIsLogicalScalar(prhs(3)))) then - call mexErrMsgTxt("Third argument should be a logical scalar") + if (.not. (mxIsScalar(prhs(3)) .and. mxIsNumeric(prhs(3)))) then + call mexErrMsgTxt("Third argument (tolf) should be a numeric scalar") return end if + if (.not. (mxIsScalar(prhs(4)) .and. mxIsNumeric(prhs(4)))) then + call mexErrMsgTxt("Fourth argument (tolx) should be a numeric scalar") + return + end if + + if (.not. (mxIsScalar(prhs(5)) .and. mxIsNumeric(prhs(5)))) then + call mexErrMsgTxt("Fifth argument (maxiter) should be a numeric scalar") + return + end if + + if (.not. (mxIsLogicalScalar(prhs(6)))) then + call mexErrMsgTxt("Sixth argument (debug) should be a logical scalar") + return + end if + + if (.not. (mxIsStruct(prhs(7)) .and. & + (mxGetNumberOfFields(prhs(7)) == 0 .or. mxGetNumberOfFields(prhs(7)) == 4))) then + call mexErrMsgTxt("Seventh argument should be a struct with either 0 or 4 fields") + return + end if + specializedunivariateblocks = (mxGetNumberOfFields(prhs(7)) == 4) + func => prhs(1) - debug = mxGetScalar(prhs(3)) == 1._c_double - extra_args => prhs(4:nrhs) + tolf = mxGetScalar(prhs(3)) + tolx = mxGetScalar(prhs(4)) + maxiter = int(mxGetScalar(prhs(5))) + debug = mxGetScalar(prhs(6)) == 1._c_double + extra_args => prhs(8:nrhs) ! Extra arguments to func are in argument 8 and subsequent ones associate (x_mat => mxGetPr(prhs(2))) allocate(x(size(x_mat))) x = x_mat end associate + if (specializedunivariateblocks) then + block + type(c_ptr) :: tmp + tmp = mxGetField(prhs(7), 1_mwIndex, "isloggedlhs") + if (.not. (c_associated(tmp) .and. mxIsLogical(tmp) .and. mxGetNumberOfElements(tmp) == size(x))) then + call mexErrMsgTxt("Seventh argument must have a 'isloggedlhs' field of type logical, of same size as second argument") + return + end if + isloggedlhs => mxGetLogicals(tmp) + + tmp = mxGetField(prhs(7), 1_mwIndex, "isauxdiffloggedrhs") + if (.not. (c_associated(tmp) .and. mxIsLogical(tmp) .and. mxGetNumberOfElements(tmp) == size(x))) then + call mexErrMsgTxt("Seventh argument must have a 'isauxdiffloggedrhs' field of type & + &logical, of same size as second argument") + return + end if + isauxdiffloggedrhs => mxGetLogicals(tmp) + + lhs = mxGetField(prhs(7), 1_mwIndex, "lhs") + if (.not. (c_associated(lhs) .and. mxIsCell(lhs) .and. mxGetNumberOfElements(lhs) == size(x))) then + call mexErrMsgTxt("Seventh argument must have a 'lhs' field of type cell, of same size as second argument") + return + end if + + endo_names = mxGetField(prhs(7), 1_mwIndex, "endo_names") + if (.not. (c_associated(endo_names) .and. mxIsCell(endo_names) .and. mxGetNumberOfElements(endo_names) == size(x))) then + call mexErrMsgTxt("Seventh argument must have a 'endo_names' field of type cell, of same size as second argument") + return + end if + end block + + allocate(evaled_cols(0)) + fre = .false. + end if allocate(fvec(size(x))) allocate(fjac(size(x), size(x))) @@ -79,7 +144,6 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') end if ! Solve the system, starting from bottom-rightmost block - x_all => x do i = size(blocks),1,-1 if (debug) then write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' of size ', i0)") & @@ -87,18 +151,77 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') call mexPrintf_trim_newline(debug_msg) end if + if (specializedunivariateblocks .and. size(blocks(i)%col_indices) == 1) then + if (debug) then + write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' by evaluating RHS')") i + call mexPrintf_trim_newline(debug_msg) + end if + associate (eq => blocks(i)%row_indices(1), var => blocks(i)%col_indices(1)) + if (fre .or. any(abs(fjac(eq, evaled_cols)) > 0._real64)) then + ! Reevaluation of the residuals is required because the current RHS depends on + ! variables that potentially have been updated previously. + nullify(x_indices, f_indices, x_all) + call matlab_fcn(x, fvec) + deallocate(evaled_cols) ! This shouldn’t be necessary, but it crashes otherwise with gfortran 8 + allocate(evaled_cols(0)) + fre = .false. + end if + evaled_cols = [ evaled_cols, var] + block + ! An associate() construct for lhs_eq and endo_name_var makes the + ! code crash (with double free) using gfortran 8. Hence use a block + character(kind=c_char, len=:), allocatable :: lhs_eq, endo_name_var + lhs_eq = mxArrayToString(mxGetCell(lhs, int(eq, mwIndex))) + endo_name_var = mxArrayToString(mxGetCell(endo_names, int(var, mwIndex))) + if (lhs_eq == endo_name_var .or. lhs_eq == "log(" // endo_name_var // ")") then + if (isloggedlhs(eq)) then + x(var) = exp(log(x(var)) - fvec(eq)) + else + x(var) = x(var) - fvec(eq) + end if + else + if (debug) then + write (debug_msg, "('LHS variable is not determined by RHS expression (', i0, ')')") eq + call mexPrintf_trim_newline(debug_msg) + write (debug_msg, "(a, ' -> ', a)") lhs_eq, endo_name_var + call mexPrintf_trim_newline(debug_msg) + end if + if (lhs_eq(1:9) == "AUX_DIFF_" .or. lhs_eq(1:13) == "log(AUX_DIFF_") then + if (isauxdiffloggedrhs(eq)) then + x(var) = exp(log(x(var)) + fvec(eq)) + else + x(var) = x(var) + fvec(eq) + end if + else + call mexErrMsgTxt("Algorithm solve_algo=14 cannot be used with this nonlinear problem") + return + end if + end if + end block + end associate + cycle + else + if (debug) then + write (debug_msg, "('DYNARE_SOLVE (solve_algo=13|14): solving block ', i0, ' with trust region routine')") i + call mexPrintf_trim_newline(debug_msg) + end if + end if + block real(real64), dimension(size(blocks(i)%col_indices)) :: x_block x_indices => blocks(i)%col_indices f_indices => blocks(i)%row_indices + x_all => x if (size(x_indices) /= size(f_indices)) then call mexErrMsgTxt("Non-square block") return end if x_block = x(x_indices) - call trust_region_solve(x_block, matlab_fcn, info, tolf = tolf) + call trust_region_solve(x_block, matlab_fcn, info, tolx, tolf, maxiter) x(x_indices) = x_block end block + + fre = .true. end do ! Verify that we have a solution @@ -113,7 +236,7 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') if (maxval(abs(fvec)) > tolf) then if (debug) & call mexPrintf_trim_newline("DYNARE_SOLVE (solve_algo=13|14): residuals still too large, solving for the whole model") - call trust_region_solve(x, matlab_fcn, info, tolf = tolf) + call trust_region_solve(x, matlab_fcn, info, tolx, tolf, maxiter) else info = 1 end if diff --git a/tests/Makefile.am b/tests/Makefile.am index 9da3c18dc..b8333e142 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -134,6 +134,7 @@ MODFILES = \ steady_state/walsh1_ssm_block.mod \ steady_state/multi_leads.mod \ steady_state/example1_trust_region.mod \ + steady_state/example1_block_trust_region.mod \ steady_state/Gali_2015_chapter_6_4.mod \ steady_state_operator/standard.mod \ steady_state_operator/use_dll.mod \ diff --git a/tests/steady_state/example1_block_trust_region.mod b/tests/steady_state/example1_block_trust_region.mod new file mode 100644 index 000000000..54643e083 --- /dev/null +++ b/tests/steady_state/example1_block_trust_region.mod @@ -0,0 +1,47 @@ +// Test block trust region nonlinear solver (solve_algo=13) +var y, c, k, a, h, b; +varexo e, u; + +parameters beta, rho, alpha, delta, theta, psi, tau; + +alpha = 0.36; +rho = 0.95; +tau = 0.025; +beta = 0.99; +delta = 0.025; +psi = 0; +theta = 2.95; + +phi = 0.1; + +model; +c*theta*h^(1+psi)=(1-alpha)*y; +k = beta*(((exp(b)*c)/(exp(b(+1))*c(+1))) + *(exp(b(+1))*alpha*y(+1)+(1-delta)*k)); +y = exp(a)*(k(-1)^alpha)*(h^(1-alpha)); +k = exp(b)*(y-c)+(1-delta)*k(-1); +a = rho*a(-1)+tau*b(-1) + e; +b = tau*a(-1)+rho*b(-1) + u; +end; + +initval; +y = 1; +c = 0.8; +h = 0.3; +k = 10; +a = 0; +b = 0; +e = 0; +u = 0; +end; + +options_.debug = true; +steady(solve_algo=13); + +shocks; +var e; stderr 0.009; +var u; stderr 0.009; +var e, u = phi*0.009*0.009; +end; + +stoch_simul(order=1,nomoments,irf=0);