Implements a cycle reduction Fortran routine and the associated test.

The Fortran routine replaces the former Matlab code in matlab/cycle_reduction.m
2022-09-28 11:53:22 +02:00 · 2022-09-28 11:53:22 +02:00 · d17204cc84
parent 7af14aaa0d
commit d17204cc84
14 changed files with 424 additions and 7 deletions
--- a/matlab/missing/mex/cycle_reduction/cycle_reduction.m
+++ b/matlab/missing/mex/cycle_reduction/cycle_reduction.m
--- a/mex/build/cycle_reduction.am
+++ b/mex/build/cycle_reduction.am
@ -0,0 +1,17 @@
+mex_PROGRAMS = cycle_reduction 
+
+nodist_cycle_reduction_SOURCES = \
+	mexFunction.f08 \
+	matlab_mex.F08 \
+	blas_lapack.F08
+
+BUILT_SOURCES = $(nodist_cycle_reduction_SOURCES)
+CLEANFILES = $(nodist_cycle_reduction_SOURCES)
+
+mexFunction.o: matlab_mex.mod blas.mod lapack.mod
+
+%.f08: $(top_srcdir)/../../sources/cycle_reduction/%.f08
+	$(LN_S) -f $< $@
+
+%.F08: $(top_srcdir)/../../sources/cycle_reduction/%.F08
+	$(LN_S) -f $< $@
--- a/mex/build/matlab/Makefile.am
+++ b/mex/build/matlab/Makefile.am
@ -1,6 +1,6 @@
 ACLOCAL_AMFLAGS = -I ../../../m4

-SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast libkordersim local_state_space_iterations folded_to_unfolded_dr k_order_simul k_order_mean
+SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast libkordersim local_state_space_iterations folded_to_unfolded_dr k_order_simul k_order_mean cycle_reduction

 # libdynare++ must come before gensylv and k_order_perturbation
 if ENABLE_MEX_DYNAREPLUSPLUS
--- a/mex/build/matlab/configure.ac
+++ b/mex/build/matlab/configure.ac
@ -169,6 +169,7 @@ AC_CONFIG_FILES([Makefile
                 perfect_foresight_problem/Makefile
                 num_procs/Makefile
                 block_trust_region/Makefile
-                 disclyap_fast/Makefile])
+                 disclyap_fast/Makefile
+                 cycle_reduction/Makefile])

 AC_OUTPUT
--- a/mex/build/matlab/cycle_reduction/Makefile.am
+++ b/mex/build/matlab/cycle_reduction/Makefile.am
@ -0,0 +1,2 @@
+include ../mex.am
+include ../../cycle_reduction.am
--- a/mex/build/octave/Makefile.am
+++ b/mex/build/octave/Makefile.am
@ -1,6 +1,6 @@
 ACLOCAL_AMFLAGS = -I ../../../m4

-SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast libkordersim local_state_space_iterations folded_to_unfolded_dr k_order_simul k_order_mean
+SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast libkordersim local_state_space_iterations folded_to_unfolded_dr k_order_simul k_order_mean cycle_reduction

 # libdynare++ must come before gensylv and k_order_perturbation
 if ENABLE_MEX_DYNAREPLUSPLUS
--- a/mex/build/octave/configure.ac
+++ b/mex/build/octave/configure.ac
@ -172,6 +172,7 @@ AC_CONFIG_FILES([Makefile
                 perfect_foresight_problem/Makefile
                 num_procs/Makefile
                 block_trust_region/Makefile
-                 disclyap_fast/Makefile])
+                 disclyap_fast/Makefile
+                 cycle_reduction/Makefile])

 AC_OUTPUT
--- a/mex/build/octave/cycle_reduction/Makefile.am
+++ b/mex/build/octave/cycle_reduction/Makefile.am
@ -0,0 +1,3 @@
+EXEEXT = .mex
+include ../mex.am
+include ../../cycle_reduction.am
--- a/mex/sources/Makefile.am
+++ b/mex/sources/Makefile.am
@ -25,7 +25,8 @@ EXTRA_DIST = \
 	perfect_foresight_problem \
 	num_procs \
 	block_trust_region \
-	disclyap_fast
+	disclyap_fast \
+	cycle_reduction

 clean-local:
 	rm -rf `find mex/sources -name *.o`
--- a/mex/sources/blas_lapack.F08
+++ b/mex/sources/blas_lapack.F08
@ -93,4 +93,16 @@ module lapack
       integer(blint), intent(out) :: info
     end subroutine dpotrf
  end interface
+
+  interface
+     function dlange(norm, m, n, a, lda, work)
+       import :: blint, real64
+       character, intent(in) :: norm
+       integer(blint), intent(in) :: m, n, lda
+       real(real64), dimension(*), intent(in) :: a
+       real(real64), dimension(*), intent(out) :: work
+       real(real64) :: dlange
+     end function dlange
+  end interface
+  
 end module lapack
--- a/mex/sources/cycle_reduction/mexFunction.f08
+++ b/mex/sources/cycle_reduction/mexFunction.f08
@ -0,0 +1,233 @@
+! Copyright © 2022 Dynare Team
+!
+! This file is part of Dynare.
+!
+! Dynare is free software: you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation, either version 3 of the License, or
+! (at your option) any later version.
+!
+! Dynare is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License
+! along with Dynare.  If not, see <https://www.gnu.org/licenses/>.
+
+! Implements the cyclic reduction algorithm described in
+! D.A. Bini, G. Latouche, B. Meini (2002), "Solving matrix polynomial equations arising in queueing problems", Linear Algebra and its Applications 340, pp. 222-244
+! D.A. Bini, B. Meini (1996), "On the solution of a nonlinear matrix equation arising in queueing problems", SIAM J. Matrix Anal. Appl. 17, pp. 906-926.
+
+module reduction
+   use matlab_mex
+   use lapack
+   use blas
+   implicit none
+
+contains
+   ! Matlab-like norm function calling lapack DLANGE routine
+   real(real64) function norm(A, c)
+      real(real64), dimension(:,:), intent(in) :: A
+      character, intent(in) :: c
+      real(real64), dimension(:), allocatable :: work
+      integer(blint) :: nrow
+      nrow = int(size(A,1),blint)
+      norm = dlange(c, nrow, int(size(A,2), blint), &
+                   &A, nrow, work)
+   end function norm
+
+   ! Updating a matrix using blas DGEMM
+   ! C <- alpha*A*B + beta*C
+   subroutine updatemat(alpha, A, B, beta, C)
+      real(real64), dimension(:,:), intent(in) :: A, B      
+      real(real64), dimension(:,:), intent(inout) :: C 
+      real(real64), intent(in) :: alpha, beta
+      integer(blint) :: m, n, k
+      m = int(size(A,1), blint)
+      n = int(size(B,2), blint)
+      k = int(size(A,2), blint)
+      call dgemm("N", "N", m, n, k, alpha, A, m, B, &
+                 k, beta, C, m)
+   end subroutine
+
+   ! Computing inv(A)*B using lapack DGESV
+   ! B <- inv(A)*B
+   subroutine invA_B(A, B, ipiv, info)
+      real(real64), dimension(:,:), intent(inout) :: A
+      real(real64), dimension(:,:), intent(inout) :: B
+      integer(blint), dimension(:), intent(inout) :: ipiv
+      integer(blint), intent(inout) :: info
+      integer(blint) :: m, n
+      m = int(size(A,1), blint)
+      n = int(size(B,2), blint)
+      call dgesv(m, n, A, m, ipiv, B, m, info)
+   end subroutine invA_B
+
+   ! Cycle reduction algorithm
+   subroutine cycle_reduction(A0, A1, A2, X, cvg_tol, check, info)
+      real(real64), dimension(:,:), intent(in) :: A0, A1, A2
+      real(real64), dimension(:,:), intent(inout) :: X
+      real(real64), intent(in) :: cvg_tol
+      logical, intent(in) :: check
+      real(c_double), dimension(2), intent(inout) :: info
+      
+      real(real64), dimension(:,:), allocatable :: A02, &
+      Q0, Q2, Ahat, invA1_A02, A1i, A0_tmp, A1_tmp
+      integer :: it, n, dn, max_it
+      integer(blint) :: info_inv
+      integer(blint), dimension(:), allocatable :: ipiv
+      real(real64) :: residual, crit
+      character(kind=c_char, len=10) :: cvg_tol_str, residual_str
+
+      Ahat = A1
+      A1i = A1
+      n = size(A0,1)
+      dn = 2*n
+      allocate(A02(n,dn), ipiv(n), invA1_A02(n,dn), Q0(n,dn), Q2(n,dn))
+      A02(:,1:n) = A0
+      A02(:,n+1:dn) = A2
+      it = 0
+      max_it = 300
+loop: do
+         ! Computing [A0;A2]*(A1\[A0 A2]) 
+         A1_tmp = A1i
+         invA1_A02 = A02
+         call invA_B(A1_tmp, invA1_A02, ipiv, info_inv)
+         call updatemat(1._real64, A02(:,1:n), invA1_A02, 0._real64, Q0)
+         call updatemat(1._real64, A02(:,n+1:dn), invA1_A02, 0._real64, Q2)
+         ! Updating A02, A1 and Ahat
+         A1i = A1i - Q0(:,n+1:dn) - Q2(:,1:n)
+         A02(:,1:n) = -Q0(:,1:n)
+         A02(:,n+1:dn) = -Q2(:,n+1:dn)
+         Ahat = Ahat-Q2(:,1:n)
+         crit = norm(A02(:,1:n),"1")
+         ! Checking for stopping conditions
+         if (crit < cvg_tol) then
+            if (norm(A02(:,n+1:dn), "1") < cvg_tol) then
+               exit loop
+            end if
+         elseif (it == max_it) then
+            info(1) = 401._c_double
+            info(2) = real(log(norm(A1i,"1")), c_double)
+            exit loop
+         elseif (isnan(crit) .or. (info_inv /= 0_blint)) then
+            info(1) = 402._c_double
+            info(2) = real(log(norm(A1i,"1")), c_double)
+            exit loop
+         end if
+         it = it + 1
+      end do loop
+
+      ! Computing X = -Ahat\A0
+      X = -A0
+      call invA_B(Ahat, X, ipiv, info_inv)
+
+      ! Checking residuals if necessary
+      if (check) then
+         ! A1_tmp <- A2*X + A1
+         A1_tmp = A1
+         call updatemat(1._real64, A2, X, 1._real64, A1_tmp)
+         ! A0_tmp <- A1_tmp*X + A0
+         A0_tmp = A0
+         call updatemat(1._real64, A1_tmp, X, 1._real64, A0_tmp)
+         residual = norm(A0_tmp, "1")
+         if (residual>cvg_tol) then
+            info(1) = 403._c_double
+            info(2) = log(residual)
+            write (cvg_tol_str,"(es8.2)") cvg_tol
+            write (residual_str,"(es8.2)") residual
+            call mexErrMsgTxt("The norm of the residual is "&
+                          &// trim(residual_str) // &
+                          &", whereas the tolerance criterion is " &
+                           // trim(cvg_tol_str) // "." )
+         end if
+      end if
+   end subroutine cycle_reduction
+
+end module reduction
+
+subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
+   use reduction 
+   use matlab_mex
+   implicit none   
+
+   type(c_ptr), dimension(*), intent(in), target :: prhs
+   type(c_ptr), dimension(*), intent(out) :: plhs
+   integer(c_int), intent(in), value :: nlhs, nrhs
+
+   integer :: i, n
+   character(kind=c_char, len=2) :: num2str 
+   real(real64) :: cvg_tol
+   real(real64), dimension(:,:), pointer,  contiguous :: A0, A1, A2, X
+   real(real64), dimension(2) :: info
+   logical :: check
+
+   ! 0. Checking the consistency and validity of input arguments
+   if (nrhs < 4) then
+      call mexErrMsgTxt("Must have at least 4 inputs")
+   end if
+   if (nrhs > 5) then
+      call mexErrMsgTxt("Too many input arguments")
+   end if
+   if (nlhs > 2) then
+      call mexErrMsgTxt("Too many output arguments")
+   end if
+   
+   do i=1,3
+      if (.not. (c_associated(prhs(i)) .and. mxIsDouble(prhs(i)) .and. & 
+          (.not. mxIsComplex(prhs(i))) .and. (.not. mxIsSparse(prhs(i))))) then
+            write (num2str,"(i2)") i
+            call mexErrMsgTxt("Argument" // trim(num2str) // " should be a real dense matrix")
+      end if
+   end do
+   if (.not. (c_associated(prhs(4)) .and. mxIsScalar(prhs(4)) .and. &
+       mxIsNumeric(prhs(4)))) then
+      call mexErrMsgTxt("Argument 4 should be a numeric scalar")
+   end if
+
+   check = .false.
+   if (nrhs == 5) then
+      if (.not. (c_associated(prhs(5)))) then
+         call mexErrMsgTxt("Argument 5 should be a Matlab object")
+      else
+         if (.not. (mxIsEmpty(prhs(5)))) then
+            check = .true.
+         end if
+      end if
+   end if
+
+   n = int(mxGetM(prhs(1)))            ! Order of the considered matrices
+   if ((n /= mxGetN(prhs(1))) &        ! Number of columns of A0
+      &.or. (n /= mxGetM(prhs(2))) &   ! Number of lines of A1
+      &.or. (n /= mxGetN(prhs(2))) &   ! Number of columns of A1
+      &.or. (n /= mxGetM(prhs(3))) &   ! Number of lines of A2
+      &.or. (n /= mxGetN(prhs(3))) &   ! Number of columns of A2
+      ) then  
+      call mexErrMsgTxt("Input dimension mismatch")
+   end if
+   
+   ! 1. Storing the relevant information in Fortran format
+   A0(1:n,1:n) => mxGetPr(prhs(1))
+   A1(1:n,1:n) => mxGetPr(prhs(2))
+   A2(1:n,1:n) => mxGetPr(prhs(3))
+   cvg_tol = mxGetScalar(prhs(4))
+   info = [0._c_double,0._c_double]
+
+   plhs(1) = mxCreateDoubleMatrix(int(n, mwSize), int(n, mwSize), mxREAL)
+   X(1:n,1:n) => mxGetPr(plhs(1))
+
+   ! 2. Calling the Cycle Reduction algorithm
+   call cycle_reduction(A0, A1, A2, X, cvg_tol, check, info)
+
+   ! 3. Editing the information output if necessary
+   if (nlhs == 2) then
+      if (info(1) == 0.) then
+         plhs(2) = mxCreateDoubleScalar(0._c_double)
+      else
+         plhs(2) = mxCreateDoubleMatrix(1_mwSize, 2_mwSize, mxREAL)
+         mxGetPr(plhs(2)) = info
+      end if
+   end if
+
+end subroutine mexFunction
--- a/mex/sources/matlab_mex.F08
+++ b/mex/sources/matlab_mex.F08
@ -105,6 +105,11 @@ module matlab_mat
       type(c_ptr), intent(in), value :: pm
     end function mxGetN

+     logical(c_bool) function mxIsEmpty(pm) bind(c, name="mxIsEmpty"//API_VER)
+       use iso_c_binding
+       type(c_ptr), intent(in), value :: pm
+     end function mxIsEmpty
+
     !! Create, Query, and Access Data Types

     ! Numeric types
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@ -1223,7 +1223,8 @@ M_TRS_FILES += 	run_block_byte_tests_matlab.m.trs \
 		test_aggregate_routine_1_2.m.trs \
 		test_aggregate_routine_1_2_3.m.trs \
 		run_kronecker_tests.m.trs \
-		nonlinearsolvers.m.trs
+		nonlinearsolvers.m.trs \
+		cyclereduction.m.trs

 M_XFAIL_TRS_FILES = $(patsubst %.mod, %.m.trs, $(XFAIL_MODFILES))

@ -1235,7 +1236,8 @@ O_TRS_FILES += 	run_block_byte_tests_octave.o.trs \
 		run_all_unitary_tests.o.trs \
 		histval_initval_file_unit_tests.o.trs \
 		run_kronecker_tests.o.trs \
-		nonlinearsolvers.o.trs
+		nonlinearsolvers.o.trs \
+		cyclereduction.o.trs

 O_XFAIL_TRS_FILES = $(patsubst %.mod, %.o.trs, $(XFAIL_MODFILES))

--- a/tests/cyclereduction.m
+++ b/tests/cyclereduction.m
@ -0,0 +1,140 @@
+debug = false;
+
+if debug
+    [top_test_dir, ~, ~] = fileparts(mfilename('fullpath'));
+else
+    top_test_dir = getenv('TOP_TEST_DIR');
+end
+
+addpath([top_test_dir filesep '..' filesep 'matlab']);
+
+if ~debug
+    % Test Dynare Version
+    if ~strcmp(dynare_version(), getenv('DYNARE_VERSION'))
+        error('Incorrect version of Dynare is being tested')
+    end
+end
+
+dynare_config;
+
+NumberOfTests = 0;
+testFailed = 0;
+
+if ~debug
+    skipline()
+    disp('***  TESTING: cyclereduction.m ***');
+end
+
+matlab_cr_path = [top_test_dir filesep '..' filesep 'matlab' filesep 'missing' filesep 'mex' filesep 'cycle_reduction'];
+addpath(matlab_cr_path);
+cycle_reduction_matlab = @cycle_reduction;
+rmpath(matlab_cr_path);
+
+if isoctave
+   addpath([top_test_dir filesep '..' filesep 'mex' filesep 'octave']);
+else
+   addpath([top_test_dir filesep '..' filesep 'mex' filesep 'matlab']);
+end
+
+t0 = clock;
+
+% Set the dimension of the problem to be solved.
+n = 500;
+% Set the equation to be solved
+A = eye(n);
+B = diag(30*ones(n,1)); B(1,1) = 20; B(end,end) = 20; B = B - diag(10*ones(n-1,1),-1); B = B - diag(10*ones(n-1,1),1);
+C = diag(15*ones(n,1)); C = C - diag(5*ones(n-1,1),-1); C = C - diag(5*ones(n-1,1),1);
+cvg_tol = 1e-12;
+X1 = zeros(n,n);
+X2 = zeros(n,n);
+
+% 1. Solve the equation with the Matlab cycle reduction algorithm
+NumberOfTests = NumberOfTests+1;
+tElapsed1 = 0.;
+try
+   tic; [X1,info] = cycle_reduction_matlab(C,B,A,cvg_tol,[0.]); tElapsed1 = toc;
+   disp(['Elapsed time for the Matlab cycle reduction algorithm is: ' num2str(tElapsed1) ' (n=' int2str(n) ').'])
+   R = C+B*X1+A*X1*X1;
+   if (max(abs(R(:))) > cvg_tol)
+      testFailed = testFailed+1;
+      if debug
+         dprintf('Matlab cycle_reduction solution is wrong')
+      end
+   end
+catch
+   testFailed = testFailed+1;
+   if debug
+      dprintf('Matlab cycle_reduction failed')
+   end
+end
+
+% 2. Solve the equation with the Fortran cycle reduction algorithm
+NumberOfTests = NumberOfTests+1;
+tElapsed2 = 0.;
+try
+   tic; [X2,info] = cycle_reduction(C,B,A,cvg_tol,[0.]); tElapsed2 = toc;
+   disp(['Elapsed time for the Fortran cycle reduction algorithm is: ' num2str(tElapsed2) ' (n=' int2str(n) ').'])
+   R = C+B*X2+A*X2*X2;
+   if (max(abs(R(:))) > cvg_tol)
+      testFailed = testFailed+1;
+      if debug
+         dprintf('Fortran cycle_reduction solution is wrong')
+      end
+   end
+catch
+   testFailed = testFailed+1;
+   if debug
+      dprintf('Fortran cycle_reduction failed')
+   end
+end
+
+% 3. Compare solutions of the Fortran and Matlab routines
+NumberOfTests = NumberOfTests+1;
+if (max(abs(X1(:) - X2(:))) > cvg_tol)
+   testFailed = testFailed+1;
+   if debug
+      dprintf('Fortran and Matlab cycle reduction solutions differ');
+   end
+end
+
+% Compare the Fortran and Matlab execution time
+if debug
+   if tElapsed1<tElapsed2
+      skipline()
+      dprintf('Matlab cyclic reduction is %5.2f times faster than its Fortran counterpart.', tElapsed2/tElapsed1)
+      skipline()
+   else
+      skipline()
+      dprintf('Fortran cyclic reduction is %5.2f times faster than its Matlab counterpart.', tElapsed1/tElapsed2)
+      skipline()
+   end
+end
+
+t1 = clock;
+
+if ~debug
+    cd(getenv('TOP_TEST_DIR'));
+else
+    dprintf('FAILED tests: %i', testFailed)
+end
+
+if  isoctave
+    fid = fopen('cyclereduction.o.trs', 'w+');
+else
+    fid = fopen('cyclereduction.m.trs', 'w+');
+end
+if testFailed
+    fprintf(fid,':test-result: FAIL\n');
+else
+    fprintf(fid,':test-result: PASS\n');
+end
+fprintf(fid,':number-tests: %i\n', NumberOfTests);
+fprintf(fid,':number-failed-tests: %i\n', testFailed);
+fprintf(fid,':list-of-passed-tests: cyclereduction.m\n');
+fprintf(fid,':elapsed-time: %f\n', etime(t1, t0));
+fclose(fid);
+
+if ~debug
+    exit;
+end
+