Parallelizes local_state_space_iteration_fortran
parent
3f00b51950
commit
5f828e241f
|
@ -7,7 +7,9 @@ nodist_libkordersim_a_SOURCES = \
|
|||
sort.f08 \
|
||||
partitions.f08 \
|
||||
simulation.f08 \
|
||||
struct.f08
|
||||
struct.f08 \
|
||||
pthread.F08 \
|
||||
pparticle.f08
|
||||
|
||||
BUILT_SOURCES = $(nodist_libkordersim_a_SOURCES)
|
||||
CLEANFILES = $(nodist_libkordersim_a_SOURCES)
|
||||
|
@ -25,5 +27,8 @@ partitions.mod: partitions.o
|
|||
simulation.o: partitions.mod lapack.mod
|
||||
simulation.mod: simulation.o
|
||||
|
||||
pparticle.o: simulation.mod matlab_mex.mod
|
||||
pparticle.mod: pparticle.o
|
||||
|
||||
%.f08: $(top_srcdir)/../../sources/libkordersim/%.f08
|
||||
$(LN_S) -f $< $@
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
mex_PROGRAMS = local_state_space_iteration_fortran
|
||||
|
||||
local_state_space_iteration_fortran_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim
|
||||
local_state_space_iteration_fortran_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
|
||||
|
||||
nodist_local_state_space_iteration_fortran_SOURCES = \
|
||||
mexFunction.f08
|
||||
|
||||
local_state_space_iteration_fortran_LDADD = ../libkordersim/libkordersim.a
|
||||
local_state_space_iteration_fortran_LDADD = ../libkordersim/libkordersim.a
|
||||
|
||||
BUILT_SOURCES = $(nodist_local_state_space_iteration_fortran_SOURCES)
|
||||
CLEANFILES = $(nodist_local_state_space_iteration_fortran_SOURCES)
|
||||
|
|
|
@ -56,6 +56,9 @@ AM_PROG_AR
|
|||
|
||||
AX_CXX11_THREAD
|
||||
|
||||
# Get the size of a pthread_t instance, needed by pthread.F08
|
||||
AC_CHECK_SIZEOF([pthread_t], [], [#include <pthread.h>])
|
||||
|
||||
# Check for dlopen(), needed by k_order_perturbation DLL
|
||||
AC_CHECK_LIB([dl], [dlopen], [LIBADD_DLOPEN="-ldl"], [])
|
||||
AC_SUBST([LIBADD_DLOPEN])
|
||||
|
|
|
@ -32,7 +32,7 @@ clean-local:
|
|||
cd $(top_srcdir)/../../matlab && rm -f $(PROGRAMS); \
|
||||
fi
|
||||
|
||||
# Rules for the Fortran 2008 interface to MEX and BLAS/LAPACK functions
|
||||
# Rules for the Fortran 2008 interface to MEX, BLAS/LAPACK and pthread functions
|
||||
matlab_mat.mod: matlab_mex.o
|
||||
matlab_mex.mod: matlab_mex.o
|
||||
|
||||
|
@ -44,3 +44,8 @@ lapack.mod: blas_lapack.o
|
|||
|
||||
blas_lapack.F08: $(top_srcdir)/../../sources/blas_lapack.F08
|
||||
$(LN_S) -f $< $@
|
||||
|
||||
pthread.mod: pthread.o
|
||||
|
||||
pthread.F08: $(top_srcdir)/../../sources/pthread.F08
|
||||
$(LN_S) -f $< $@
|
||||
|
|
|
@ -52,6 +52,9 @@ AM_PROG_AR
|
|||
|
||||
AX_CXX11_THREAD
|
||||
|
||||
# Get the size of a pthread_t instance, needed by pthread.F08
|
||||
AC_CHECK_SIZEOF([pthread_t], [], [#include <pthread.h>])
|
||||
|
||||
# Check for dlopen(), needed by k_order_perturbation DLL
|
||||
AC_CHECK_LIB([dl], [dlopen], [LIBADD_DLOPEN="-ldl"], [])
|
||||
AC_SUBST([LIBADD_DLOPEN])
|
||||
|
|
|
@ -40,7 +40,7 @@ clean-local:
|
|||
cd $(top_srcdir)/../../octave && rm -f $(PROGRAMS); \
|
||||
fi
|
||||
|
||||
# Rules for the Fortran 2008 interface to MEX and BLAS/LAPACK functions
|
||||
# Rules for the Fortran 2008 interface to MEX, BLAS/LAPACK and pthread functions
|
||||
matlab_mat.mod: matlab_mex.o
|
||||
matlab_mex.mod: matlab_mex.o
|
||||
|
||||
|
@ -52,3 +52,8 @@ lapack.mod: blas_lapack.o
|
|||
|
||||
blas_lapack.F08: $(top_srcdir)/../../sources/blas_lapack.F08
|
||||
$(LN_S) -f $< $@
|
||||
|
||||
pthread.mod: pthread.o
|
||||
|
||||
pthread.F08: $(top_srcdir)/../../sources/pthread.F08
|
||||
$(LN_S) -f $< $@
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
! Copyright © 2021 Dynare Team
|
||||
!
|
||||
! This file is part of Dynare.
|
||||
!
|
||||
! Dynare is free software: you can redistribute it and/or modify
|
||||
! it under the terms of the GNU General Public License as published by
|
||||
! the Free Software Foundation, either version 3 of the License, or
|
||||
! (at your option) any later version.
|
||||
!
|
||||
! Dynare is distributed in the hope that it will be useful,
|
||||
! but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
! GNU General Public License for more details.
|
||||
!
|
||||
! You should have received a copy of the GNU General Public License
|
||||
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
! Routines and data structures for multithreading over particles in local_state_space_iteration_fortran
|
||||
module pparticle
|
||||
use iso_c_binding
|
||||
use simulation
|
||||
use matlab_mex
|
||||
|
||||
implicit none
|
||||
|
||||
type tdata
|
||||
integer :: nm, nys, endo_nbr, nvar, order, nrestricted, nparticles
|
||||
real(real64), allocatable :: yhat(:,:), e(:,:), ynext(:,:), ys_reordered(:), restrict_var_list(:)
|
||||
type(pol), dimension(:), allocatable :: udr
|
||||
end type tdata
|
||||
|
||||
type(tdata) :: thread_data
|
||||
|
||||
contains
|
||||
|
||||
subroutine thread_eval(arg) bind(c)
|
||||
type(c_ptr), intent(in), value :: arg
|
||||
integer, pointer :: im
|
||||
integer :: i, j, start, end, q, r, ind
|
||||
type(horner), dimension(:), allocatable :: h
|
||||
real(real64), dimension(:), allocatable :: dyu
|
||||
|
||||
! Checking that the thread number got passed as argument
|
||||
if (.not. c_associated(arg)) then
|
||||
call mexErrMsgTxt("No argument was passed to thread_eval")
|
||||
end if
|
||||
call c_f_pointer(arg, im)
|
||||
|
||||
! Allocating local arrays
|
||||
allocate(h(0:thread_data%order), dyu(thread_data%nvar))
|
||||
do i=0, thread_data%order
|
||||
allocate(h(i)%c(thread_data%endo_nbr, thread_data%nvar**i))
|
||||
end do
|
||||
|
||||
! Specifying bounds for the curent thread
|
||||
q = thread_data%nparticles / thread_data%nm
|
||||
r = mod(thread_data%nparticles, thread_data%nm)
|
||||
start = (im-1)*q+1
|
||||
if (im < thread_data%nm) then
|
||||
end = start+q-1
|
||||
else
|
||||
end = thread_data%nparticles
|
||||
end if
|
||||
|
||||
! Using the Horner algorithm to evaluate the decision rule at the chosen yhat and epsilon
|
||||
do j=start,end
|
||||
dyu(1:thread_data%nys) = thread_data%yhat(:,j)
|
||||
dyu(thread_data%nys+1:) = thread_data%e(:,j)
|
||||
call eval(h, dyu, thread_data%udr, thread_data%endo_nbr, thread_data%nvar, thread_data%order)
|
||||
do i=1,thread_data%nrestricted
|
||||
ind = int(thread_data%restrict_var_list(i))
|
||||
thread_data%ynext(i,j) = h(0)%c(ind,1) + thread_data%ys_reordered(ind)
|
||||
end do
|
||||
end do
|
||||
|
||||
end subroutine thread_eval
|
||||
|
||||
end module pparticle
|
|
@ -30,21 +30,21 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
|
|||
use iso_c_binding
|
||||
use struct
|
||||
use matlab_mex
|
||||
use partitions
|
||||
use simulation
|
||||
use pthread
|
||||
use pparticle
|
||||
implicit none
|
||||
|
||||
type(c_ptr), dimension(*), intent(in), target :: prhs
|
||||
type(c_ptr), dimension(*), intent(out) :: plhs
|
||||
integer(c_int), intent(in), value :: nlhs, nrhs
|
||||
type(c_ptr) :: M_mx, options_mx, dr_mx, yhat_mx, epsilon_mx, udr_mx, tmp
|
||||
type(pol), dimension(:), allocatable, target :: udr
|
||||
integer :: order, nstatic, npred, nboth, nfwrd, exo_nbr, endo_nbr, nparticles, nys, nvar, nrestricted
|
||||
real(real64), dimension(:), allocatable :: ys_reordered, dyu
|
||||
type(pol), dimension(:), allocatable :: udr
|
||||
integer :: order, nstatic, npred, nboth, nfwrd, exo_nbr, endo_nbr, nparticles, nys, nvar, nrestricted, nm
|
||||
real(real64), dimension(:), pointer, contiguous :: order_var, ys, restrict_var_list
|
||||
real(real64), dimension(:,:), allocatable :: yhat, e, ynext, ynext_all
|
||||
type(horner), dimension(:), allocatable :: h
|
||||
integer :: i, j, m, n
|
||||
real(real64), allocatable :: yhat(:,:), e(:,:), ynext(:,:), ys_reordered(:)
|
||||
integer :: i, m, n, rc
|
||||
type(c_pthread_t), allocatable :: threads(:)
|
||||
integer, allocatable, target :: routines(:)
|
||||
character(kind=c_char, len=10) :: fieldname
|
||||
|
||||
yhat_mx = prhs(1)
|
||||
|
@ -114,6 +114,13 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
|
|||
restrict_var_list => mxGetPr(restrict_var_list_mx)
|
||||
end associate
|
||||
|
||||
associate (thread_mx => mxGetField(options_mx, 1_mwIndex, "threads"))
|
||||
if (.not. c_associated(thread_mx)) then
|
||||
call mexErrMsgTxt("Cannot find `threads' in options_")
|
||||
end if
|
||||
nm = get_int_field(thread_mx, "local_state_space_iteration_fortran")
|
||||
end associate
|
||||
|
||||
nparticles = int(mxGetN(yhat_mx));
|
||||
if (int(mxGetN(epsilon_mx)) /= nparticles) then
|
||||
call mexErrMsgTxt("epsilon and yhat don't have the same number of columns")
|
||||
|
@ -125,11 +132,12 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
|
|||
call mexErrMsgTxt("epsilon should be a double precision matrix with exo_nbr rows")
|
||||
end if
|
||||
|
||||
allocate(yhat(nys, nparticles), e(exo_nbr, nparticles), ynext(nrestricted, nparticles), ynext_all(endo_nbr, nparticles))
|
||||
allocate(yhat(nys, nparticles), e(exo_nbr, nparticles), ynext(nrestricted, nparticles))
|
||||
yhat = reshape(mxGetPr(yhat_mx), [nys, nparticles])
|
||||
e = reshape(mxGetPr(epsilon_mx), [exo_nbr, nparticles])
|
||||
|
||||
allocate(h(0:order), udr(0:order))
|
||||
|
||||
allocate(udr(0:order))
|
||||
do i = 0, order
|
||||
write (fieldname, '(a2, i1)') "g_", i
|
||||
tmp = mxGetField(udr_mx, 1_mwIndex, trim(fieldname))
|
||||
|
@ -138,23 +146,45 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
|
|||
end if
|
||||
m = int(mxGetM(tmp))
|
||||
n = int(mxGetN(tmp))
|
||||
allocate(udr(i)%g(m,n), h(i)%c(endo_nbr, nvar**i))
|
||||
allocate(udr(i)%g(m,n))
|
||||
udr(i)%g(1:m,1:n) = reshape(mxGetPr(tmp), [m,n])
|
||||
end do
|
||||
|
||||
! Using the Horner algorithm to evaluate the decision rule at the chosen yhat and epsilon
|
||||
allocate(dyu(nvar))
|
||||
do j=1,nparticles
|
||||
dyu(1:nys) = yhat(:,j)
|
||||
dyu(nys+1:) = e(:,j)
|
||||
call eval(h, dyu, udr, endo_nbr, nvar, order)
|
||||
ynext_all(:,j) = h(0)%c(:,1) + ys_reordered
|
||||
do i=1,nrestricted
|
||||
ynext(i,j) = ynext_all(int(restrict_var_list(i)),j)
|
||||
end do
|
||||
end do
|
||||
! Initializing the global structure containing
|
||||
! useful information for threads
|
||||
thread_data%nm = nm
|
||||
thread_data%nys = nys
|
||||
thread_data%endo_nbr = endo_nbr
|
||||
thread_data%nvar = nvar
|
||||
thread_data%order = order
|
||||
thread_data%nrestricted = nrestricted
|
||||
thread_data%nparticles = nparticles
|
||||
thread_data%yhat = yhat
|
||||
thread_data%e = e
|
||||
thread_data%ynext = ynext
|
||||
thread_data%udr = udr
|
||||
thread_data%ys_reordered = ys_reordered
|
||||
thread_data%restrict_var_list = restrict_var_list
|
||||
|
||||
allocate(threads(nm), routines(nm))
|
||||
routines = [ (i, i = 1, nm) ]
|
||||
|
||||
if (nm == 1) then
|
||||
call thread_eval(c_loc(routines(1)))
|
||||
else
|
||||
! Creating the threads
|
||||
do i = 1, nm
|
||||
rc = c_pthread_create(threads(i), c_null_ptr, c_funloc(thread_eval), c_loc(routines(i)))
|
||||
end do
|
||||
|
||||
! Joining the threads
|
||||
do i = 1, nm
|
||||
rc = c_pthread_join(threads(i), c_loc(routines(i)))
|
||||
end do
|
||||
end if
|
||||
|
||||
! Returning the result
|
||||
plhs(1) = mxCreateDoubleMatrix(int(size(restrict_var_list), mwSize), int(nparticles, mwSize), mxREAL)
|
||||
mxGetPr(plhs(1)) = reshape(ynext, [size(ynext)])
|
||||
|
||||
mxGetPr(plhs(1)) = reshape(thread_data%ynext, [size(thread_data%ynext)])
|
||||
|
||||
end subroutine mexFunction
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
! Copyright © 2022 Dynare Team
|
||||
!
|
||||
! This file is part of Dynare.
|
||||
!
|
||||
! Dynare is free software: you can redistribute it and/or modify
|
||||
! it under the terms of the GNU General Public License as published by
|
||||
! the Free Software Foundation, either version 3 of the License, or
|
||||
! (at your option) any later version.
|
||||
!
|
||||
! Dynare is distributed in the hope that it will be useful,
|
||||
! but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
! GNU General Public License for more details.
|
||||
!
|
||||
! You should have received a copy of the GNU General Public License
|
||||
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
! Wrapper around pthread_create and pthread_join C routines for POSIX multithreading
|
||||
module pthread
|
||||
use iso_c_binding
|
||||
implicit none
|
||||
private
|
||||
|
||||
public :: c_pthread_create
|
||||
public :: c_pthread_join
|
||||
|
||||
! SIZEOF_PTHREAD_T is set by the AC_CHECK_SIZEOF routine
|
||||
integer, parameter :: pthread_size = SIZEOF_PTHREAD_T
|
||||
|
||||
type, bind(c), public :: c_pthread_t
|
||||
private
|
||||
integer(kind=c_char) :: hidden(pthread_size)
|
||||
end type c_pthread_t
|
||||
|
||||
interface
|
||||
|
||||
function c_pthread_create(thread, attr, start_routine, arg) bind(c, name='pthread_create')
|
||||
import :: c_int, c_ptr, c_funptr, c_pthread_t
|
||||
implicit none
|
||||
type(c_pthread_t), intent(inout) :: thread
|
||||
type(c_ptr), intent(in), value :: attr
|
||||
type(c_funptr), intent(in), value :: start_routine
|
||||
type(c_ptr), intent(in), value :: arg
|
||||
integer(kind=c_int) :: c_pthread_create
|
||||
end function c_pthread_create
|
||||
|
||||
function c_pthread_join(thread, value_ptr) bind(c, name='pthread_join')
|
||||
import :: c_int, c_ptr, c_pthread_t
|
||||
implicit none
|
||||
type(c_pthread_t), intent(in), value :: thread
|
||||
type(c_ptr), intent(in) :: value_ptr
|
||||
integer(kind=c_int) :: c_pthread_join
|
||||
end function c_pthread_join
|
||||
|
||||
end interface
|
||||
end module pthread
|
|
@ -618,7 +618,8 @@ PARTICLEFILES = \
|
|||
particle/dsge_base2.mod \
|
||||
particle/first_spec.mod \
|
||||
particle/first_spec_MCMC.mod \
|
||||
particle/local_state_space_iteration_k_test.mod
|
||||
particle/local_state_space_iteration_k_test.mod \
|
||||
particle/local_it_k_test_parallel.mod
|
||||
|
||||
|
||||
XFAIL_MODFILES = ramst_xfail.mod \
|
||||
|
@ -928,6 +929,9 @@ discretionary_policy/Gali_2015_chapter_3_nonlinear.o.trs: discretionary_policy/G
|
|||
particle/local_state_space_iteration_k_test.m.trs: particle/first_spec.m.trs
|
||||
particle/local_state_space_iteration_k_test.o.trs: particle/first_spec.o.trs
|
||||
|
||||
particle/local_it_k_test_parallel.m.trs: particle/first_spec.m.trs
|
||||
particle/local_it_k_test_parallel.o.trs: particle/first_spec.o.trs
|
||||
|
||||
pruning/AS_pruned_state_space_red_shock.m.trs: pruning/AnSchorfheide_pruned_state_space.m.trs
|
||||
pruning/AS_pruned_state_space_red_shock.o.trs: pruning/AnSchorfheide_pruned_state_space.o.trs
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
Tests that the parallelized version of local_state_space_iteration_k and local_state_space_iteration_fortran
|
||||
(for k=3) return the same results.
|
||||
|
||||
This file must be run after first_spec.mod (both are based on the same model).
|
||||
*/
|
||||
|
||||
@#include "first_spec_common.inc"
|
||||
|
||||
varobs q ca;
|
||||
|
||||
shocks;
|
||||
var eeps = 0.04^2;
|
||||
var nnu = 0.03^2;
|
||||
var q = 0.01^2;
|
||||
var ca = 0.01^2;
|
||||
end;
|
||||
|
||||
// Initialize various structures
|
||||
estimation(datafile='my_data.mat',order=2,mode_compute=0,mh_replic=0,filter_algorithm=sis,nonlinear_filter_initialization=2
|
||||
,cova_compute=0 %tell program that no covariance matrix was computed
|
||||
);
|
||||
|
||||
stoch_simul(order=2, periods=200, irf=0, k_order_solver);
|
||||
|
||||
// Really perform the test
|
||||
|
||||
nparticles = options_.particle.number_of_particles;
|
||||
nsims = 1e6/nparticles;
|
||||
|
||||
/* We generate particles using realistic distributions (though this is not
|
||||
strictly needed) */
|
||||
state_idx = oo_.dr.order_var((M_.nstatic+1):(M_.nstatic+M_.npred+M_.nboth));
|
||||
yhat = chol(oo_.var(state_idx,state_idx))*randn(M_.npred+M_.nboth, nparticles);
|
||||
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
|
||||
|
||||
stoch_simul(order=3, periods=200, irf=0, k_order_solver);
|
||||
|
||||
options_.threads.local_state_space_iteration_k = 2;
|
||||
options_.threads.local_state_space_iteration_fortran = 2;
|
||||
|
||||
tStart1 = tic; for i=1:nsims, ynext1 = local_state_space_iteration_k(yhat, epsilon, oo_.dr, M_, options_); end, tElapsed1 = toc(tStart1);
|
||||
|
||||
tStart2 = tic; [udr] = folded_to_unfolded_dr(oo_.dr, M_, options_); for i=1:nsims, ynext2 = local_state_space_iteration_fortran(yhat, epsilon, oo_.dr, M_, options_, udr); end, tElapsed2 = toc(tStart2);
|
||||
|
||||
if max(max(abs(ynext1-ynext2))) > 1e-14
|
||||
error('Inconsistency between local_state_space_iteration_k and local_state_space_iteration_fortran')
|
||||
end
|
||||
|
||||
if tElapsed2<tElapsed1
|
||||
skipline()
|
||||
dprintf('local_state_space_iteration_fortran is %5.2f times faster than local_state_space_iteration_k', tElapsed1/tElapsed2)
|
||||
skipline()
|
||||
else
|
||||
skipline()
|
||||
dprintf('local_state_space_iteration_fortran is %5.2f times slower than local_state_space_iteration_k', tElapsed2/tElapsed1)
|
||||
skipline()
|
||||
end
|
|
@ -1,6 +1,8 @@
|
|||
/*
|
||||
Tests that local_state_space_iteration_2 and local_state_space_iteration_k
|
||||
(for k=2) return the same results.
|
||||
Tests that :
|
||||
(i) local_state_space_iteration_2, local_state_space_iteration_k and local_state_space_iteration_fortran
|
||||
(for k=2) return the same results
|
||||
(ii) local_state_space_iteration_k and local_state_space_iteration_fortran return the same results for k=3 without parallelization.
|
||||
|
||||
This file must be run after first_spec.mod (both are based on the same model).
|
||||
*/
|
||||
|
@ -35,6 +37,9 @@ yhat = chol(oo_.var(state_idx,state_idx))*randn(M_.npred+M_.nboth, nparticles);
|
|||
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
|
||||
|
||||
dr = oo_.dr;
|
||||
options_.threads.local_state_space_iteration_fortran = 1;
|
||||
options_.threads.local_state_space_iteration_k = 1;
|
||||
|
||||
|
||||
// “rf” stands for “Reduced Form”
|
||||
rf_ghx = dr.ghx(dr.restrict_var_list, :);
|
||||
|
@ -86,7 +91,7 @@ tStart31 = tic; for i=1:nsims, ynext31 = local_state_space_iteration_k(yhat, eps
|
|||
tStart32 = tic; [udr] = folded_to_unfolded_dr(oo_.dr, M_, options_); for i=1:nsims, ynext32 = local_state_space_iteration_fortran(yhat, epsilon, oo_.dr, M_, options_, udr); end, tElapsed32 = toc(tStart32);
|
||||
|
||||
if max(max(abs(ynext31-ynext32))) > 1e-14
|
||||
error('Inconsistency between local_state_space_iteration_2 and local_state_space_iteration_fortran')
|
||||
error('Inconsistency between local_state_space_iteration_k and local_state_space_iteration_fortran')
|
||||
end
|
||||
|
||||
if tElapsed32<tElapsed31
|
||||
|
|
Loading…
Reference in New Issue