Local state-space iteration at order 3: multi-thread 3rd-order version with and without pruning
parent
982ec2e66d
commit
c6d5c48ff7
|
@ -8,8 +8,8 @@ nodist_libkordersim_a_SOURCES = \
|
|||
partitions.f08 \
|
||||
simulation.f08 \
|
||||
struct.f08 \
|
||||
pthread.F08 \
|
||||
pparticle.f08
|
||||
pthread.F08
|
||||
|
||||
|
||||
BUILT_SOURCES = $(nodist_libkordersim_a_SOURCES)
|
||||
CLEANFILES = $(nodist_libkordersim_a_SOURCES)
|
||||
|
@ -27,8 +27,5 @@ partitions.mod: partitions.o
|
|||
simulation.o: partitions.mod lapack.mod
|
||||
simulation.mod: simulation.o
|
||||
|
||||
pparticle.o: simulation.mod matlab_mex.mod
|
||||
pparticle.mod: pparticle.o
|
||||
|
||||
%.f08: $(top_srcdir)/../../sources/libkordersim/%.f08
|
||||
$(LN_S) -f $< $@
|
||||
|
|
|
@ -1,21 +1,27 @@
|
|||
mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_k
|
||||
mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_3 local_state_space_iteration_k
|
||||
|
||||
nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
|
||||
nodist_local_state_space_iteration_3_SOURCES = local_state_space_iteration_3.f08
|
||||
nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.f08
|
||||
|
||||
local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
|
||||
local_state_space_iteration_2_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS)
|
||||
|
||||
local_state_space_iteration_3_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
|
||||
local_state_space_iteration_3_LDADD = ../libkordersim/libkordersim.a
|
||||
|
||||
local_state_space_iteration_k_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
|
||||
local_state_space_iteration_k_LDADD = ../libkordersim/libkordersim.a
|
||||
|
||||
BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) \
|
||||
$(nodist_local_state_space_iteration_k_SOURCES)
|
||||
$(nodist_local_state_space_iteration_3_SOURCES) \
|
||||
$(nodist_local_state_space_iteration_k_SOURCES)
|
||||
CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) \
|
||||
$(nodist_local_state_space_iteration_k_SOURCES)
|
||||
|
||||
%.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc
|
||||
$(LN_S) -f $< $@
|
||||
$(nodist_local_state_space_iteration_3_SOURCES) \
|
||||
$(nodist_local_state_space_iteration_k_SOURCES)
|
||||
|
||||
%.f08: $(top_srcdir)/../../sources/local_state_space_iterations/%.f08
|
||||
$(LN_S) -f $< $@
|
||||
|
||||
%.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc
|
||||
$(LN_S) -f $< $@
|
|
@ -6,6 +6,7 @@ EXTRA_DIST = \
|
|||
blas_lapack.F08 \
|
||||
defines.F08 \
|
||||
matlab_mex.F08 \
|
||||
pthread.F08 \
|
||||
mjdgges \
|
||||
kronecker \
|
||||
bytecode \
|
||||
|
|
|
@ -22,15 +22,16 @@
|
|||
module partitions
|
||||
use pascal
|
||||
use sort
|
||||
use iso_fortran_env
|
||||
implicit none
|
||||
|
||||
! index represents the aforementioned (α₁,…,αₘ) objects
|
||||
type index
|
||||
integer, dimension(:), allocatable :: ind
|
||||
integer, dimension(:), allocatable :: coor
|
||||
end type index
|
||||
|
||||
interface index
|
||||
module procedure :: init_index
|
||||
module procedure :: init_index, init_index_vec, init_index_int
|
||||
end interface index
|
||||
|
||||
! a dictionary that matches folded indices with folded offsets
|
||||
|
@ -56,18 +57,36 @@ module partitions
|
|||
|
||||
contains
|
||||
|
||||
! Constructor for the index type
|
||||
type(index) function init_index(d, ind)
|
||||
! Constructors for the index type
|
||||
! Simply allocates the index with the size provided as input
|
||||
type(index) function init_index(d)
|
||||
integer, intent(in) :: d
|
||||
integer, dimension(d), intent(in) :: ind
|
||||
allocate(init_index%ind(d))
|
||||
init_index%ind = ind
|
||||
allocate(init_index%coor(d))
|
||||
end function init_index
|
||||
|
||||
! Creates an index with the vector provided as inputs
|
||||
type(index) function init_index_vec(ind)
|
||||
integer, dimension(:), intent(in) :: ind
|
||||
allocate(init_index_vec%coor(size(ind)))
|
||||
init_index_vec%coor = ind
|
||||
end function init_index_vec
|
||||
|
||||
! Creates the index with a given size
|
||||
! and fills it with a given integer
|
||||
type(index) function init_index_int(d, m)
|
||||
integer, intent(in) :: d, m
|
||||
integer :: i
|
||||
allocate(init_index_int%coor(d))
|
||||
do i=1,d
|
||||
init_index_int%coor(i) = m
|
||||
end do
|
||||
end function init_index_int
|
||||
|
||||
! Operators for the index type
|
||||
! Comparison for the index type. Returns true if the two indices are different
|
||||
type(logical) function diff_indices(i1,i2)
|
||||
type(index), intent(in) :: i1, i2
|
||||
if (size(i1%ind) /= size(i2%ind) .or. any(i1%ind /= i2%ind)) then
|
||||
if (size(i1%coor) /= size(i2%coor) .or. any(i1%coor /= i2%coor)) then
|
||||
diff_indices = .true.
|
||||
else
|
||||
diff_indices = .false.
|
||||
|
@ -91,7 +110,7 @@ contains
|
|||
integer :: i
|
||||
i = 1
|
||||
if (d>1) then
|
||||
do while ((i < d) .and. (idx%ind(i+1) == idx%ind(1)))
|
||||
do while ((i < d) .and. (idx%coor(i+1) == idx%coor(1)))
|
||||
i = i+1
|
||||
end do
|
||||
end if
|
||||
|
@ -99,11 +118,10 @@ contains
|
|||
end function get_prefix_length
|
||||
|
||||
! Gets the folded index associated with an unfolded index
|
||||
type(index) function u_index_to_f_index(idx, d)
|
||||
type(index) function u_index_to_f_index(idx)
|
||||
type(index), intent(in) :: idx
|
||||
integer, intent(in) :: d
|
||||
u_index_to_f_index = index(d, idx%ind)
|
||||
call sort_int(u_index_to_f_index%ind)
|
||||
u_index_to_f_index = index(idx%coor)
|
||||
call sort_int(u_index_to_f_index%coor)
|
||||
end function u_index_to_f_index
|
||||
|
||||
! Converts the offset of an unfolded tensor to the associated unfolded tensor index
|
||||
|
@ -112,11 +130,11 @@ contains
|
|||
type(index) function u_offset_to_u_index(j, n, d)
|
||||
integer, intent(in) :: j, n, d ! offset, number of variables and dimensions respectively
|
||||
integer :: i, tmp, r
|
||||
allocate(u_offset_to_u_index%ind(d))
|
||||
allocate(u_offset_to_u_index%coor(d))
|
||||
tmp = j-1 ! We substract 1 as j ∈ {1, ..., n} so that tmp ∈ {0, ..., n-1} and our modular operations work
|
||||
do i=d,1,-1
|
||||
r = mod(tmp, n)
|
||||
u_offset_to_u_index%ind(i) = r
|
||||
u_offset_to_u_index%coor(i) = r
|
||||
tmp = (tmp-r)/n
|
||||
end do
|
||||
end function u_offset_to_u_index
|
||||
|
@ -136,11 +154,26 @@ contains
|
|||
j = 1
|
||||
else
|
||||
prefix = get_prefix_length(idx,d)
|
||||
tmp = index(d-prefix, idx%ind(prefix+1:) - idx%ind(1))
|
||||
j = get(d, n+d-1, p) - get(d, n-idx%ind(1)+d-1, p) + f_index_to_f_offset(tmp, n-idx%ind(1), d-prefix, p)
|
||||
tmp = index(idx%coor(prefix+1:) - idx%coor(1))
|
||||
j = get(d, n+d-1, p) - get(d, n-idx%coor(1)+d-1, p) + f_index_to_f_offset(tmp, n-idx%coor(1), d-prefix, p)
|
||||
end if
|
||||
end function f_index_to_f_offset
|
||||
|
||||
|
||||
! Returns the unfolded tensor offset associated with an unfolded tensor index
|
||||
! Written in a recursive way, the unfolded offset off(α₁,…,αₘ) associated with the
|
||||
! index (α₁,…,αₘ) with αᵢ ∈ {1, ..., n} verifies
|
||||
! off(α₁,…,αₘ) = n*off(α₁,…,αₘ₋₁) + αₘ
|
||||
integer function u_index_to_u_offset(idx, n, d)
|
||||
type(index), intent(in) :: idx ! unfolded index
|
||||
integer, intent(in) :: n, d ! number of variables and dimensions
|
||||
integer :: j
|
||||
u_index_to_u_offset = 0
|
||||
do j=1,d
|
||||
u_index_to_u_offset = n*u_index_to_u_offset + idx%coor(j)-1
|
||||
end do
|
||||
u_index_to_u_offset = u_index_to_u_offset + 1
|
||||
end function u_index_to_u_offset
|
||||
|
||||
! Function that searches a value in an array of a given length
|
||||
type(integer) function find(a, v, l)
|
||||
integer, intent(in) :: l ! length of the array
|
||||
|
@ -180,7 +213,7 @@ contains
|
|||
c = dict(n, d, p)
|
||||
do j=1,n**d
|
||||
tmp = u_offset_to_u_index(j,n,d)
|
||||
tmp = u_index_to_f_index(tmp, d)
|
||||
tmp = u_index_to_f_index(tmp)
|
||||
found = find(c%indices, tmp, c%pr)
|
||||
if (found == 0) then
|
||||
c%pr = c%pr+1
|
||||
|
@ -193,7 +226,128 @@ contains
|
|||
end do
|
||||
end subroutine fill_folded_indices
|
||||
|
||||
end module partitions
|
||||
! ! Specialized code for local_state_space_iteration_3
|
||||
! ! Considering the folded tensor gᵥᵥ, for each folded offset,
|
||||
! ! fills (i) the corresponding index, (ii) the corresponding
|
||||
! ! unfolded offset in the corresponding unfolded tensor
|
||||
! ! and (iii) the number of equivalent unfolded indices the folded index
|
||||
! ! associated with the folded offset represents
|
||||
! subroutine index_2(indices, uoff, neq, q)
|
||||
! integer, intent(in) :: q ! size of v
|
||||
! integer, dimension(:), intent(inout) :: uoff, neq ! list of corresponding unfolded offsets and number of equivalent unfolded indices
|
||||
! type(index), dimension(:), intent(inout) :: indices ! list of folded indices
|
||||
! integer :: m, j
|
||||
! m = q*(q+1)/2 ! total number of folded indices : ⎛q+2-1⎞
|
||||
! ! ⎝ 2 ⎠
|
||||
! uoff(1) = 1
|
||||
! neq(1) = 1
|
||||
! ! offsets such that j ∈ { 2, ..., q } are associated with
|
||||
! ! indices (1, α), α ∈ { 2, ..., q }
|
||||
! do j=2,q
|
||||
! neq(j) = 2
|
||||
! end do
|
||||
! end subroutine index_2
|
||||
|
||||
! In order to list folded indices α = (α₁,…,αₘ) with αᵢ ∈ { 1, ..., n },
|
||||
! at least 2 algorithms exist: a recursive one and an iterative one.
|
||||
! The recursive algorithm list_folded_indices(n,m,q) that returns
|
||||
! the list of all folded indices α = (α₁,…,αₘ) with αᵢ ∈ { 1+q, ..., n+q } works as follows:
|
||||
! if n=0, return an empty list
|
||||
! else if m=0, return the list containing the sole zero-sized index
|
||||
! otherwise,
|
||||
! return the concatenation of ([1+q, ℓ] for ℓ ∈ list_folded_indices(n,m-1,q))
|
||||
! and list_folded_indices(n-1,m,1,q+1)]
|
||||
! A call to list_folded_indices(n,m,0) then returns the list
|
||||
! of folded indices α = (α₁,…,αₘ) with αᵢ ∈ { 1, ..., n }
|
||||
! The problem with recursive functions is that the compiler may manage poorly
|
||||
! the stack, which slows down the function's execution
|
||||
! recursive function list_folded_indices(n, m, q) result(list)
|
||||
! integer :: n, m, q
|
||||
! type(index), allocatable, dimension(:) :: list, temp
|
||||
! integer :: j
|
||||
! if (m==0) then
|
||||
! list = [index(0)]
|
||||
! elseif (n == 0) then
|
||||
! allocate(list(0))
|
||||
! else
|
||||
! temp = list_folded_indices(n,m-1,q)
|
||||
! list = [(index([1+q,temp(j)%coor]), j=1, size(temp)), list_folded_indices(n-1,m,q+1)]
|
||||
! end if
|
||||
! end function list_folded_indices
|
||||
|
||||
! Considering the folded tensor gᵥᵐ, for each folded offset,
|
||||
! fills the lists of (i) the corresponding index, (ii) the corresponding
|
||||
! unfolded offset in the corresponding unfolded tensor
|
||||
! and (iii) the number of equivalent unfolded indices the folded index
|
||||
! (associated with the folded offset) represents
|
||||
! The algorithm to get the folded index associated with a folded offset
|
||||
! relies on the definition of the lexicographic order.
|
||||
! Considering α = (α₁,…,αₘ) with αᵢ ∈ { 1, ..., n },
|
||||
! the next index α' is such that there exists i that verifies
|
||||
! αⱼ = αⱼ' for all j < i, αᵢ' > αᵢ. Note that all the coordinates
|
||||
! αᵢ', ... , αₘ' need to be as small as the lexicographic order allows
|
||||
! for α' to immediately follow α.
|
||||
! Suppose j is the latest incremented coordinate:
|
||||
! if αⱼ < n, then αⱼ' = αⱼ + 1
|
||||
! otherwise αⱼ = n, set αₖ' = αⱼ₋₁ + 1 for all k ≥ j-1
|
||||
! if αⱼ₋₁ = n, set j := j-1
|
||||
! otherwise, set j := m
|
||||
! The algorithm to count the number of equivalent unfolded indices
|
||||
! works as follows. A folded index can be written as α = (x₁, ..., x₁, ..., xₚ, ..., xₚ)
|
||||
! such that x₁ < x₂ < ... < xₚ. Denote kᵢ the number of coordinates equal to xᵢ.
|
||||
! The number of unfolded indices equivalent to α is c(α) = ⎛ d ⎞
|
||||
! ⎝ k₁, k₂, ..., kₚ ⎠
|
||||
! Suppose j is the latest incremented coordinate.
|
||||
! If αⱼ < n, then αⱼ' = αⱼ + 1, k(αⱼ) := k(αⱼ)-1, k(αⱼ') := k(αⱼ')+1.
|
||||
! In this case, c(α') = c(α)*(k(αⱼ)+1)/k(αⱼ')
|
||||
! otherwise, αⱼ = n: set αₖ' = αⱼ₋₁ + 1 for all k ≥ j-1,
|
||||
! k(αⱼ₋₁) := k(αⱼ₋₁)-1, k(n) := 0, k(αⱼ₋₁') = m-(j-1)+1
|
||||
! In this case, we compute c(α') with the multinomial formula above
|
||||
! Finally, the algorithm that returns the unfolded offset of a given folded index works
|
||||
! as follows. Suppose j is the latest incremented coordinate and off(α) is the unfolded offset
|
||||
! associated with index α:
|
||||
! if αⱼ < n, then αⱼ' = αⱼ + 1 and off(α') = off(α)+1
|
||||
! otherwise, αⱼ = n: set αₖ' = αⱼ₋₁ + 1 for all k ≥ j-1
|
||||
! and off(α') can be computed using the u_index_to_u_offset routine
|
||||
subroutine folded_offset_loop(ind, nbeq, off, n, m, p)
|
||||
type(index), dimension(:), intent(inout) :: ind ! list of indices
|
||||
integer, dimension(:), intent(inout) :: nbeq, off ! lists of numbers of equivalent indices and of offsets
|
||||
integer, intent(in) :: n, m
|
||||
type(pascal_triangle), intent(in) :: p
|
||||
integer :: j, lastinc, k(n)
|
||||
ind(1) = index(m, 1)
|
||||
nbeq(1) = 1
|
||||
k = 0
|
||||
k(1) = m
|
||||
off(1) = 1
|
||||
j = 2
|
||||
lastinc = m
|
||||
do while (j <= size(ind))
|
||||
ind(j) = index(ind(j-1)%coor)
|
||||
if (ind(j-1)%coor(lastinc) == n) then
|
||||
ind(j)%coor(lastinc-1:m) = ind(j-1)%coor(lastinc-1)+1
|
||||
k(ind(j-1)%coor(lastinc-1)) = k(ind(j-1)%coor(lastinc-1))-1
|
||||
k(n) = 0
|
||||
k(ind(j)%coor(lastinc-1)) = m - (lastinc-1) + 1
|
||||
nbeq(j) = multinomial(k,m,p)
|
||||
off(j) = u_index_to_u_offset(ind(j), n, m)
|
||||
if (ind(j)%coor(m) == n) then
|
||||
lastinc = lastinc-1
|
||||
else
|
||||
lastinc = m
|
||||
end if
|
||||
else
|
||||
ind(j)%coor(lastinc) = ind(j-1)%coor(lastinc)+1
|
||||
k(ind(j)%coor(lastinc)) = k(ind(j)%coor(lastinc))+1
|
||||
nbeq(j) = nbeq(j-1)*k(ind(j-1)%coor(lastinc))/k(ind(j)%coor(lastinc))
|
||||
k(ind(j-1)%coor(lastinc)) = k(ind(j-1)%coor(lastinc))-1
|
||||
off(j) = off(j-1)+1
|
||||
end if
|
||||
j = j+1
|
||||
end do
|
||||
end subroutine folded_offset_loop
|
||||
|
||||
end module partitions
|
||||
|
||||
! gfortran -o partitions partitions.f08 pascal.f08 sort.f08
|
||||
! ./partitions
|
||||
|
@ -203,8 +357,11 @@ contains
|
|||
! implicit none
|
||||
! type(index) :: uidx, fidx, i1, i2
|
||||
! integer, dimension(:), allocatable :: folded
|
||||
! integer :: i, uj, n, d
|
||||
! integer :: i, uj, n, d, j, nb_folded_idcs
|
||||
! type(pascal_triangle) :: p
|
||||
! type(index), dimension(:), allocatable :: list_folded_idcs
|
||||
! integer, dimension(:), allocatable :: nbeq, off
|
||||
|
||||
! ! Unfolded indices and offsets
|
||||
! ! 0,0,0 1 1,0,0 10 2,0,0 19
|
||||
! ! 0,0,1 2 1,0,1 11 2,0,1 20
|
||||
|
@ -231,15 +388,15 @@ contains
|
|||
|
||||
! ! u_offset_to_u_index
|
||||
! uidx = u_offset_to_u_index(uj,n,d)
|
||||
! print '(3i2)', (uidx%ind(i), i=1,d) ! should display 0 2 1
|
||||
! print '(3i2)', (uidx%coor(i), i=1,d) ! should display 0 2 1
|
||||
|
||||
! ! f_index_to_f_offset
|
||||
! fidx = u_index_to_f_index(uidx, d)
|
||||
! fidx = u_index_to_f_index(uidx)
|
||||
! print '(i2)', f_index_to_f_offset(fidx, n, d, p) ! should display 5
|
||||
|
||||
! ! /=
|
||||
! i1 = index(5, (/1,2,3,4,5/))
|
||||
! i2 = index(5, (/1,2,3,4,6/))
|
||||
! i1 = index((/1,2,3,4,5/))
|
||||
! i2 = index((/1,2,3,4,6/))
|
||||
! if (i1 /= i2) then
|
||||
! print *, "Same!"
|
||||
! else
|
||||
|
@ -247,10 +404,25 @@ contains
|
|||
! end if
|
||||
|
||||
! ! fill_folded_indices
|
||||
! allocate(folded(n**d))
|
||||
! call fill_folded_indices(folded,n,d)
|
||||
! print *, "Matching offsets unfolded -> folded"
|
||||
! print '(1000i4)', (i, i=1,n**d)
|
||||
! print '(1000i4)', (folded(i), i=1,n**d)
|
||||
! ! allocate(folded(n**d))
|
||||
! ! call fill_folded_indices(folded,n,d,p)
|
||||
! ! print *, "Matching offsets unfolded -> folded"
|
||||
! ! print '(1000i4)', (i, i=1,n**d)
|
||||
! ! print '(1000i4)', (folded(i), i=1,n**d)
|
||||
|
||||
! n = 3
|
||||
! d = 3
|
||||
! p = pascal_triangle(n+d-1)
|
||||
! nb_folded_idcs = get(d,n+d-1,p)
|
||||
! ! recursive list_folded_indices
|
||||
! ! list_folded_idcs = list_folded_indices(n, d, 0)
|
||||
! ! print '(4i2)', ((list_folded_idcs(i)%coor(j), j=1,d), i=1,nb_folded_idcs)
|
||||
|
||||
! ! iterative list_folded_indices
|
||||
! allocate(list_folded_idcs(nb_folded_idcs), nbeq(nb_folded_idcs), off(nb_folded_idcs))
|
||||
! call folded_offset_loop(list_folded_idcs, nbeq, off, n, d, p)
|
||||
! print '(3i2)', ((list_folded_idcs(i)%coor(j), j=1,d), i=1,nb_folded_idcs)
|
||||
! print '(i3)', (nbeq(i), i=1,nb_folded_idcs)
|
||||
! print '(i4)', (off(i), i=1,nb_folded_idcs)
|
||||
|
||||
! end program test
|
|
@ -60,13 +60,28 @@ contains
|
|||
|
||||
! Returns ⎛n⎞ stored in pascal_triangle p
|
||||
! ⎝k⎠
|
||||
|
||||
type(integer) function get(k,n,p)
|
||||
integer function get(k,n,p)
|
||||
integer, intent(in) :: k, n
|
||||
type(pascal_triangle), intent(in) :: p
|
||||
get = p%lines(n)%coeffs(k+1)
|
||||
end function get
|
||||
|
||||
! Returns ⎛ d ⎞
|
||||
! ⎝ k₁, k₂, ..., kₙ ⎠
|
||||
integer function multinomial(k,d,p)
|
||||
integer, intent(in) :: k(:), d
|
||||
type(pascal_triangle), intent(in) :: p
|
||||
integer :: s, i
|
||||
s = d
|
||||
multinomial = 1
|
||||
i = 1
|
||||
do while (s > 0)
|
||||
multinomial = multinomial*get(k(i), s, p)
|
||||
s = s-k(i)
|
||||
i = i+1
|
||||
end do
|
||||
end function
|
||||
|
||||
end module pascal
|
||||
|
||||
! gfortran -o pascal pascal.f08
|
||||
|
@ -86,4 +101,10 @@ end module pascal
|
|||
! end if
|
||||
! end do
|
||||
! end do
|
||||
|
||||
! d = 3
|
||||
! p = pascal_triangle(d)
|
||||
! print '(i2)', multinomial([1,2,3], d, p) ! should print 60
|
||||
! print '(i2)', multinomial([0,0,0,3], d, p) ! should print 20
|
||||
|
||||
! end program test
|
|
@ -1,78 +0,0 @@
|
|||
! Copyright © 2021-2022 Dynare Team
|
||||
!
|
||||
! This file is part of Dynare.
|
||||
!
|
||||
! Dynare is free software: you can redistribute it and/or modify
|
||||
! it under the terms of the GNU General Public License as published by
|
||||
! the Free Software Foundation, either version 3 of the License, or
|
||||
! (at your option) any later version.
|
||||
!
|
||||
! Dynare is distributed in the hope that it will be useful,
|
||||
! but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
! GNU General Public License for more details.
|
||||
!
|
||||
! You should have received a copy of the GNU General Public License
|
||||
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
! Routines and data structures for multithreading over particles in local_state_space_iteration_k
|
||||
module pparticle
|
||||
use iso_c_binding
|
||||
use simulation
|
||||
use matlab_mex
|
||||
|
||||
implicit none
|
||||
|
||||
type tdata
|
||||
integer :: nm, nys, endo_nbr, nvar, order, nrestricted, nparticles
|
||||
real(real64), allocatable :: yhat(:,:), e(:,:), ynext(:,:), ys_reordered(:), restrict_var_list(:)
|
||||
type(pol), dimension(:), allocatable :: udr
|
||||
end type tdata
|
||||
|
||||
type(tdata) :: thread_data
|
||||
|
||||
contains
|
||||
|
||||
subroutine thread_eval(arg) bind(c)
|
||||
type(c_ptr), intent(in), value :: arg
|
||||
integer, pointer :: im
|
||||
integer :: i, j, start, end, q, r, ind
|
||||
type(horner), dimension(:), allocatable :: h
|
||||
real(real64), dimension(:), allocatable :: dyu
|
||||
|
||||
! Checking that the thread number got passed as argument
|
||||
if (.not. c_associated(arg)) then
|
||||
call mexErrMsgTxt("No argument was passed to thread_eval")
|
||||
end if
|
||||
call c_f_pointer(arg, im)
|
||||
|
||||
! Allocating local arrays
|
||||
allocate(h(0:thread_data%order), dyu(thread_data%nvar))
|
||||
do i=0, thread_data%order
|
||||
allocate(h(i)%c(thread_data%endo_nbr, thread_data%nvar**i))
|
||||
end do
|
||||
|
||||
! Specifying bounds for the curent thread
|
||||
q = thread_data%nparticles / thread_data%nm
|
||||
r = mod(thread_data%nparticles, thread_data%nm)
|
||||
start = (im-1)*q+1
|
||||
if (im < thread_data%nm) then
|
||||
end = start+q-1
|
||||
else
|
||||
end = thread_data%nparticles
|
||||
end if
|
||||
|
||||
! Using the Horner algorithm to evaluate the decision rule at the chosen yhat and epsilon
|
||||
do j=start,end
|
||||
dyu(1:thread_data%nys) = thread_data%yhat(:,j)
|
||||
dyu(thread_data%nys+1:) = thread_data%e(:,j)
|
||||
call eval(h, dyu, thread_data%udr, thread_data%endo_nbr, thread_data%nvar, thread_data%order)
|
||||
do i=1,thread_data%nrestricted
|
||||
ind = int(thread_data%restrict_var_list(i))
|
||||
thread_data%ynext(i,j) = h(0)%c(ind,1) + thread_data%ys_reordered(ind)
|
||||
end do
|
||||
end do
|
||||
|
||||
end subroutine thread_eval
|
||||
|
||||
end module pparticle
|
|
@ -0,0 +1,580 @@
|
|||
! Copyright © 2022 Dynare Team
|
||||
!
|
||||
! This file is part of Dynare.
|
||||
!
|
||||
! Dynare is free software: you can redistribute it and/or modify
|
||||
! it under the terms of the GNU General Public License as published by
|
||||
! the Free Software Foundation, either version 3 of the License, or
|
||||
! (at your option) any later version.
|
||||
!
|
||||
! Dynare is distributed in the hope that it will be useful,
|
||||
! but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
! GNU General Public License for more details.
|
||||
!
|
||||
! You should have received a copy of the GNU General Public License
|
||||
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
! Routines and data structures for multithreading over particles in local_state_space_iteration_3
|
||||
module pparticle_3
|
||||
use matlab_mex
|
||||
use partitions
|
||||
|
||||
implicit none
|
||||
|
||||
type tdata_3
|
||||
integer :: n, m, s, q, numthreads, xx_size, uu_size, xxx_size, uuu_size
|
||||
real(real64), pointer, contiguous :: yhat3(:,:), &
|
||||
&e(:,:), ghx(:,:), ghu(:,:), constant(:), ghxu(:,:), ghxx(:,:), &
|
||||
&ghuu(:,:), ghxxx(:,:), ghuuu(:,:), ghxxu(:,:), ghxuu(:,:), ghxss(:,:), &
|
||||
&ghuss(:,:), ss(:), y3(:,:)
|
||||
real(real64), pointer :: yhat2(:,:), yhat1(:,:), y2(:,:), y1(:,:)
|
||||
type(index), pointer, contiguous :: xx_idcs(:), uu_idcs(:), &
|
||||
&xxx_idcs(:), uuu_idcs(:)
|
||||
end type tdata_3
|
||||
|
||||
type(tdata_3) :: td3
|
||||
|
||||
contains
|
||||
|
||||
! Fills y3 as y3 = ybar + ½ghss + ghx·ŷ+ghu·ε + ½ghxx·ŷ⊗ŷ + ½ghuu·ε⊗ε +
|
||||
! ghxu·ŷ⊗ε + (1/6)·ghxxx ŷ⊗ŷ⊗ŷ + (1/6)·ghuuu·ε⊗ε⊗ε +
|
||||
! (3/6)·ghxxu·ŷ⊗ŷ⊗ε + (3/6)·ghxuu·ŷ⊗ε⊗ε +
|
||||
! (3/6)·ghxss·ŷ + (3/6)·ghuss·ε
|
||||
! in td3
|
||||
subroutine thread_eval_3(arg) bind(c)
|
||||
type(c_ptr), intent(in), value :: arg
|
||||
integer, pointer :: ithread
|
||||
integer :: is, im, j, k, start, end, q, r
|
||||
|
||||
! Checking that the thread number got passed as argument
|
||||
if (.not. c_associated(arg)) then
|
||||
call mexErrMsgTxt("No argument was passed to thread_eval_3")
|
||||
end if
|
||||
call c_f_pointer(arg, ithread)
|
||||
|
||||
! Specifying bounds for the curent thread
|
||||
q = td3%s / td3%numthreads
|
||||
r = mod(td3%s, td3%numthreads)
|
||||
start = (ithread-1)*q+1
|
||||
if (ithread < td3%numthreads) then
|
||||
end = start+q-1
|
||||
else
|
||||
end = td3%s
|
||||
end if
|
||||
|
||||
do is=start,end
|
||||
do im=1,td3%m
|
||||
! y3 = ybar + ½ghss
|
||||
td3%y3(im,is) = td3%constant(im)
|
||||
! y3 += ghx·ŷ+(3/6)·ghxss·ŷ + first n folded indices for ½ghxx·ŷ⊗ŷ
|
||||
! + first n folded indices for (1/6)ghxxx·ŷ⊗ŷ⊗ŷ
|
||||
do j=1,td3%n
|
||||
td3%y3(im,is) = td3%y3(im,is)+&
|
||||
&(0.5*td3%ghxss(j,im)+td3%ghx(j,im))*td3%yhat3(j,is)+&
|
||||
&(0.5*td3%ghxx(j,im)+(1./6.)*td3%ghxxx(j,im)*td3%yhat3(1, is))*&
|
||||
&td3%yhat3(1, is)*td3%yhat3(j,is)
|
||||
! y3 += ghxu·ŷ⊗ε
|
||||
! + first n*q folded indices of (3/6)·ghxxu·ŷ⊗ŷ⊗ε
|
||||
do k=1,td3%q
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&(td3%ghxu(td3%q*(j-1)+k,im)+&
|
||||
&0.5*td3%ghxxu(td3%q*(j-1)+k,im)*td3%yhat3(1, is))*&
|
||||
&td3%yhat3(j, is)*td3%e(k, is)
|
||||
end do
|
||||
end do
|
||||
! y3 += ghu·ε+(3/6)·ghuss·ε + first q folded indices of ½ghuu·ε⊗ε
|
||||
! + first q folded indices for (1/6)·ghuuu·ε⊗ε⊗ε
|
||||
! + first n*q folded indices of (3/6)·ghxuu·ŷ⊗ε⊗ε
|
||||
do j=1,td3%q
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&(0.5*td3%ghuss(j,im)+td3%ghu(j,im))*td3%e(j,is) + &
|
||||
&(0.5*td3%ghuu(j,im)+(1./6.)*td3%ghuuu(j,im)*&
|
||||
&td3%e(1, is))*td3%e(1, is)*td3%e(j, is)
|
||||
do k=1,td3%n
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*&
|
||||
&td3%yhat3(k, is)*td3%e(1, is)*td3%e(j, is)
|
||||
end do
|
||||
end do
|
||||
! y3 += remaining ½ghxx·ŷ⊗ŷ terms
|
||||
! + the next terms starting from n+1 up to xx_size
|
||||
! of (1/6)ghxxx·ŷ⊗ŷ⊗ŷ
|
||||
! + remaining terms of (3/6)·ghxxu·ŷ⊗ŷ⊗ε
|
||||
do j=td3%n+1,td3%xx_size
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&(0.5*td3%ghxx(j,im)+(1./6.)*td3%ghxxx(j,im)*td3%yhat3(1, is))*&
|
||||
&td3%yhat3(td3%xx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat3(td3%xx_idcs(j)%coor(2), is)
|
||||
do k=1,td3%q
|
||||
td3%y3(im,is) = td3%y3(im,is)+&
|
||||
&0.5*td3%ghxxu(td3%q*(j-1)+k,im)*&
|
||||
&td3%yhat3(td3%xx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat3(td3%xx_idcs(j)%coor(2), is)*&
|
||||
&td3%e(k, is)
|
||||
end do
|
||||
end do
|
||||
! y3 += remaining ½ghuu·ε⊗ε terms
|
||||
! + the next uu_size terms starting from q+1
|
||||
! of (1/6)·ghuuu·ε⊗ε⊗ε
|
||||
! + remaining terms of (3/6)·ghxuu·ŷ⊗ε⊗ε
|
||||
do j=td3%q+1,td3%uu_size
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&(0.5*td3%ghuu(j,im)+(1./6.)*td3%ghuuu(j,im)*td3%e(1, is))*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(2), is)
|
||||
do k=1,td3%n
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*&
|
||||
&td3%yhat3(k, is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(2), is)
|
||||
end do
|
||||
end do
|
||||
! y3 += remaining (1/6)·ghxxx·ŷ⊗ŷ⊗ŷ terms
|
||||
do j=td3%xx_size+1,td3%xxx_size
|
||||
td3%y3(im,is) = td3%y3(im,is)+&
|
||||
&(1./6.)*td3%ghxxx(j,im)*&
|
||||
&td3%yhat3(td3%xxx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat3(td3%xxx_idcs(j)%coor(2), is)*&
|
||||
&td3%yhat3(td3%xxx_idcs(j)%coor(3), is)
|
||||
end do
|
||||
! y3 += remaining (1/6)ghuuu·ε⊗ε⊗ε terms
|
||||
do j=td3%uu_size+1,td3%uuu_size
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&(1./6.)*td3%ghuuu(j,im)*&
|
||||
&td3%e(td3%uuu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uuu_idcs(j)%coor(2), is)*&
|
||||
&td3%e(td3%uuu_idcs(j)%coor(3), is)
|
||||
end do
|
||||
|
||||
end do
|
||||
end do
|
||||
|
||||
end subroutine thread_eval_3
|
||||
|
||||
! Fills y1 and y2 as
|
||||
! y1 = ybar + ghx·ŷ1 + ghu·ε
|
||||
! y2 = ybar + ½ghss + ghx·ŷ2 + ghu·ε + ½ghxx·ŷ1⊗ŷ1 + ½ghuu·ε⊗ε + ghxu·ŷ1⊗ε
|
||||
! y3 = ybar + ghx·ŷ3 + ghu·ε + ghxx·ŷ1⊗ŷ2 + ghuu·ε⊗ε + ghxu·ŷ1⊗ε + ghxu·ŷ2⊗ε
|
||||
! + (1/6)·ghxxx·ŷ1⊗ŷ1⊗ŷ1 + (1/6)·ghuuu·ε⊗ε⊗ε + (3/6)·ghxxu·ŷ1⊗ŷ1⊗ε
|
||||
! + (3/6)·ghxuu·ŷ1⊗ε⊗ε + (3/6)·ghxss·ŷ1 + (3/6)·ghuss·ε
|
||||
! in td3
|
||||
subroutine thread_eval_3_pruning(arg) bind(c)
|
||||
type(c_ptr), intent(in), value :: arg
|
||||
integer, pointer :: ithread
|
||||
integer :: is, im, j, k, start, end, q, r
|
||||
real(real64) :: x, y
|
||||
|
||||
! Checking that the thread number got passed as argument
|
||||
if (.not. c_associated(arg)) then
|
||||
call mexErrMsgTxt("No argument was passed to thread_eval")
|
||||
end if
|
||||
call c_f_pointer(arg, ithread)
|
||||
|
||||
! Specifying bounds for the curent thread
|
||||
q = td3%s / td3%numthreads
|
||||
r = mod(td3%s, td3%numthreads)
|
||||
start = (ithread-1)*q+1
|
||||
if (ithread < td3%numthreads) then
|
||||
end = start+q-1
|
||||
else
|
||||
end = td3%s
|
||||
end if
|
||||
|
||||
do is=start,end
|
||||
do im=1,td3%m
|
||||
! y1 = ybar
|
||||
! y2 = ybar + ½ghss
|
||||
! y3 = ybar
|
||||
td3%y1(im,is) = td3%ss(im)
|
||||
td3%y2(im,is) = td3%constant(im)
|
||||
td3%y3(im,is) = td3%ss(im)
|
||||
! y1 += ghx·ŷ1
|
||||
! y2 += ghx·ŷ2 + first n folded indices for ½ghxx·ŷ1⊗ŷ1
|
||||
! y3 += ghx·ŷ3 +(3/6)·ghxss·ŷ
|
||||
! + first n folded indices for ghxx·ŷ1⊗ŷ2
|
||||
! + first n folded indices for (1/6)ghxxx·ŷ1⊗ŷ1⊗ŷ1
|
||||
do j=1,td3%n
|
||||
td3%y1(im,is) = td3%y1(im,is) + td3%ghx(j,im)*td3%yhat1(j,is)
|
||||
td3%y2(im,is) = td3%y2(im,is) + td3%ghx(j,im)*td3%yhat2(j,is) +&
|
||||
&0.5*td3%ghxx(j,im)*td3%yhat1(1, is)*td3%yhat1(j, is)
|
||||
td3%y3(im,is) = td3%y3(im,is) + td3%ghx(j,im)*td3%yhat3(j,is) +&
|
||||
&0.5*td3%ghxss(j,im)*td3%yhat1(j,is) +&
|
||||
&td3%ghxx(j,im)*td3%yhat1(1, is)*td3%yhat2(j, is)+&
|
||||
&(1./6.)*td3%ghxxx(j,im)*td3%yhat1(1, is)*&
|
||||
&td3%yhat1(1, is)*td3%yhat1(j,is)
|
||||
! y2 += + ghxu·ŷ1⊗ε
|
||||
! y3 += + ghxu·ŷ1⊗ε + ghxu·ŷ2⊗ε
|
||||
! + first n*q folded indices of (3/6)·ghxxu·ŷ1⊗ŷ1⊗ε
|
||||
do k=1,td3%q
|
||||
td3%y2(im,is) = td3%y2(im,is)+&
|
||||
&td3%ghxu(td3%q*(j-1)+k,im)*&
|
||||
&td3%yhat1(j, is)*td3%e(k, is)
|
||||
td3%y3(im,is) = td3%y3(im,is)+&
|
||||
&td3%ghxu(td3%q*(j-1)+k,im)*&
|
||||
&(td3%yhat1(j, is)+td3%yhat2(j, is))*td3%e(k, is)+&
|
||||
&0.5*td3%ghxxu(td3%q*(j-1)+k,im)*td3%yhat1(1, is)*&
|
||||
&td3%yhat1(j, is)*td3%e(k, is)
|
||||
end do
|
||||
end do
|
||||
! y1 += +ghu·ε
|
||||
! y2 += +ghu·ε + first q folded indices for ½ghuu·ε⊗ε
|
||||
! y3 += +ghu·ε + first q folded indices for ghuu·ε⊗ε
|
||||
! + first n*q folded indices of (3/6)·ghxuu·ŷ1⊗ε⊗ε
|
||||
! + first n folded indices of (1/6)·ghuuu·ε⊗ε⊗ε
|
||||
do j=1,td3%q
|
||||
x = td3%ghu(j,im)*td3%e(j,is)
|
||||
y = td3%ghuu(j,im)*td3%e(1, is)*td3%e(j, is)
|
||||
td3%y1(im,is) = td3%y1(im,is) + x
|
||||
td3%y2(im,is) = td3%y2(im,is) + x + 0.5*y
|
||||
td3%y3(im,is) = td3%y3(im,is) + x + y +&
|
||||
&td3%ghuss(j,im)*td3%e(j,is)+&
|
||||
&(1./6.)*td3%ghuuu(j,im)*td3%e(1, is)*td3%e(1, is)*&
|
||||
&td3%e(j, is)
|
||||
do k=1,td3%n
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*&
|
||||
&td3%yhat1(k, is)*td3%e(1, is)*td3%e(j, is)
|
||||
end do
|
||||
end do
|
||||
! y2 += remaining ½ghxx·ŷ1⊗ŷ1 terms
|
||||
! y3 += remaining ghxx·ŷ1⊗ŷ2 terms
|
||||
! + the next terms starting from n+1 up to xx_size
|
||||
! of (1/6)ghxxx·ŷ1⊗ŷ1⊗ŷ1
|
||||
! + remaining terms of (3/6)·ghxxu·ŷ1⊗ŷ1⊗ε
|
||||
do j=td3%n+1,td3%xx_size
|
||||
td3%y2(im,is) = td3%y2(im,is) + &
|
||||
&0.5*td3%ghxx(j,im)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(2), is)
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&td3%ghxx(j,im)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat2(td3%xx_idcs(j)%coor(2), is)+&
|
||||
&(1./6.)*td3%ghxxx(j,im)*td3%yhat1(1, is)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(2), is)
|
||||
do k=1,td3%n
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&0.5*td3%ghxxu(td3%q*(j-1)+k,im)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat1(td3%xx_idcs(j)%coor(2), is)*&
|
||||
&td3%e(k, is)
|
||||
end do
|
||||
end do
|
||||
! y2 += remaining ½ghuu·ε⊗ε terms
|
||||
! y3 += remaining ghuu·ε⊗ε terms
|
||||
! + remaining terms of (3/6)·ghxuu·ŷ⊗ε⊗ε
|
||||
! + the next uu_size terms starting from q+1
|
||||
! of (1/6)·ghuuu·ε⊗ε⊗ε
|
||||
do j=td3%q+1,td3%uu_size
|
||||
x = td3%ghuu(j,im)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(2), is)
|
||||
td3%y2(im,is) = td3%y2(im,is)+0.5*x
|
||||
td3%y3(im,is) = td3%y3(im,is)+x+&
|
||||
&(1./6.)*td3%ghuuu(j,im)*td3%e(1, is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(2), is)
|
||||
do k=1,td3%n
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*&
|
||||
&td3%yhat1(k, is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uu_idcs(j)%coor(2), is)
|
||||
end do
|
||||
end do
|
||||
! y3 += remaining (1/6)·ghxxx·ŷ⊗ŷ⊗ŷ terms
|
||||
do j=td3%xx_size+1,td3%xxx_size
|
||||
td3%y3(im,is) = td3%y3(im,is)+&
|
||||
&(1./6.)*td3%ghxxx(j,im)*&
|
||||
&td3%yhat1(td3%xxx_idcs(j)%coor(1), is)*&
|
||||
&td3%yhat1(td3%xxx_idcs(j)%coor(2), is)*&
|
||||
&td3%yhat1(td3%xxx_idcs(j)%coor(3), is)
|
||||
end do
|
||||
! y3 += remaining (1/6)ghuuu·ε⊗ε⊗ε terms
|
||||
do j=td3%uu_size+1,td3%uuu_size
|
||||
td3%y3(im,is) = td3%y3(im,is) + &
|
||||
&(1./6.)*td3%ghuuu(j,im)*&
|
||||
&td3%e(td3%uuu_idcs(j)%coor(1), is)*&
|
||||
&td3%e(td3%uuu_idcs(j)%coor(2), is)*&
|
||||
&td3%e(td3%uuu_idcs(j)%coor(3), is)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
end subroutine thread_eval_3_pruning
|
||||
|
||||
end module pparticle_3
|
||||
|
||||
! The code of the local_state_space_iteration_3 routine
|
||||
! Input:
|
||||
! prhs[1] yhat3 [double] n×s array, time t particles.
|
||||
! prhs[2] e [double] q×s array, time t innovations.
|
||||
! prhs[3] ghx [double] m×n array, first order reduced form.
|
||||
! prhs[4] ghu [double] m×q array, first order reduced form.
|
||||
! prhs[5] constant [double] m×1 array, deterministic steady state +
|
||||
! third order correction for the union of
|
||||
! the states and observed variables.
|
||||
! prhs[6] ghxx [double] m×n² array, second order reduced form.
|
||||
! prhs[7] ghuu [double] m×q² array, second order reduced form.
|
||||
! prhs[8] ghxu [double] m×nq array, second order reduced form.
|
||||
! prhs[9] ghxxx [double] m×n array, third order reduced form.
|
||||
! prhs[10] ghuuu [double] m×q array, third order reduced form.
|
||||
! prhs[11] ghxxu [double] m×n²q array, third order reduced form.
|
||||
! prhs[12] ghxuu [double] m×nq² array, third order reduced form.
|
||||
! prhs[13] ghxss [double] m×n array, third order reduced form.
|
||||
! prhs[14] ghuss [double] m×q array, third order reduced form.
|
||||
! prhs[15] yhat2 [double] [OPTIONAL] 2n×s array, time t particles up to 2nd order pruning additional latent variables. The first n rows concern the pruning first-order latent variables, while the last n rows concern the pruning 2nd-order latent variables
|
||||
! prhs[16] ss [double] [OPTIONAL] m×1 array, steady state for the union of the states and the observed variables (needed for the pruning mode).
|
||||
! prhs[15 or 17] [double] num of threads
|
||||
!
|
||||
! Output:
|
||||
! plhs[1] y3 [double] m×s array, time t+1 particles.
|
||||
! plhs[2] y2 [double] 2m×s array, time t+1 particles for the pruning latent variables up to 2nd order. The first m rows concern the pruning first-order latent variables, while the last m rows concern the pruning 2nd-order latent variables
|
||||
subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
|
||||
use iso_c_binding
|
||||
use matlab_mex
|
||||
use pascal
|
||||
use partitions
|
||||
use pthread
|
||||
use pparticle_3
|
||||
implicit none
|
||||
|
||||
type(c_ptr), dimension(*), intent(in), target :: prhs
|
||||
type(c_ptr), dimension(*), intent(out) :: plhs
|
||||
integer(c_int), intent(in), value :: nlhs, nrhs
|
||||
integer :: n, m, s, q, numthreads
|
||||
real(real64), pointer, contiguous :: ghx(:,:), ghu(:,:), ghxx(:,:), &
|
||||
&ghuu(:,:), ghxu(:,:), ghxxx(:,:), ghuuu(:,:), ghxxu(:,:), &
|
||||
&ghxuu(:,:), ghxss(:,:), ghuss(:,:), yhatlat(:,:), ylat(:,:)
|
||||
integer :: i, j, k, xx_size, uu_size, xxx_size, uuu_size, rc
|
||||
character(kind=c_char, len=10) :: arg_nber
|
||||
type(pascal_triangle) :: p
|
||||
integer, allocatable :: xx_nbeq(:), xxx_nbeq(:), &
|
||||
&uu_nbeq(:), uuu_nbeq(:), xx_off(:), uu_off(:), &
|
||||
&xxx_off(:), uuu_off(:)
|
||||
type(c_pthread_t), allocatable :: threads(:)
|
||||
integer, allocatable, target :: routines(:)
|
||||
|
||||
! 0. Checking the consistency and validity of input arguments
|
||||
if (nrhs /= 15 .and. nrhs /= 17) then
|
||||
call mexErrMsgTxt("Must have exactly 15 inputs or 18 inputs")
|
||||
end if
|
||||
|
||||
if (nlhs > 2) then
|
||||
call mexErrMsgTxt("Too many output arguments.")
|
||||
end if
|
||||
|
||||
do i=1, max(14, nrhs-1)
|
||||
if (.not. (c_associated(prhs(i)) .and. mxIsDouble(prhs(i)) .and. &
|
||||
(.not. mxIsComplex(prhs(i))) .and. (.not. mxIsSparse(prhs(i))))) then
|
||||
write (arg_nber,"(i2)") i
|
||||
call mexErrMsgTxt("Argument " // trim(arg_nber) // " should be a real dense matrix")
|
||||
end if
|
||||
end do
|
||||
|
||||
i = max(15,nrhs)
|
||||
if (.not. (c_associated(prhs(i)) .and. mxIsScalar(prhs(i)) .and. &
|
||||
mxIsNumeric(prhs(i)))) then
|
||||
write (arg_nber,"(i2)") i
|
||||
call mexErrMsgTxt("Argument " // trim(arg_nber) // " should be a numeric scalar")
|
||||
end if
|
||||
numthreads = int(mxGetScalar(prhs(i)))
|
||||
if (numthreads <= 0) then
|
||||
write (arg_nber,"(i2)") i
|
||||
call mexErrMsgTxt("Argument " // trim(arg_nber) // " should be a positive integer")
|
||||
end if
|
||||
td3%numthreads = numthreads
|
||||
|
||||
n = int(mxGetM(prhs(1))) ! Number of states.
|
||||
s = int(mxGetN(prhs(1))) ! Number of particles.
|
||||
q = int(mxGetM(prhs(2))) ! Number of innovations.
|
||||
m = int(mxGetM(prhs(3))) ! Number of elements in the union of states and observed variables.
|
||||
td3%n = n
|
||||
td3%s = s
|
||||
td3%q = q
|
||||
td3%m = m
|
||||
|
||||
if ((s /= mxGetN(prhs(2))) & ! Number of columns for epsilon
|
||||
&.or. (n /= mxGetN(prhs(3))) & ! Number of columns for ghx
|
||||
&.or. (m /= mxGetM(prhs(4))) & ! Number of rows for ghu
|
||||
&.or. (q /= mxGetN(prhs(4))) & ! Number of columns for ghu
|
||||
&.or. (m /= mxGetM(prhs(5))) & ! Number of rows for 2nd order constant correction + deterministic steady state
|
||||
&.or. (m /= mxGetM(prhs(6))) & ! Number of rows for ghxx
|
||||
&.or. (n*n /= mxGetN(prhs(6))) & ! Number of columns for ghxx
|
||||
&.or. (m /= mxGetM(prhs(7))) & ! Number of rows for ghuu
|
||||
&.or. (q*q /= mxGetN(prhs(7))) & ! Number of columns for ghuu
|
||||
&.or. (m /= mxGetM(prhs(8))) & ! Number of rows for ghxu
|
||||
&.or. (n*q /= mxGetN(prhs(8))) & ! Number of columns for ghxu
|
||||
&.or. (m /= mxGetM(prhs(9))) & ! Number of rows for ghxxx
|
||||
&.or. (n*n*n /= mxGetN(prhs(9))) & ! Number of columns for ghxxx
|
||||
&.or. (m /= mxGetM(prhs(10))) & ! Number of rows for ghuuu
|
||||
&.or. (q*q*q /= mxGetN(prhs(10))) & ! Number of columns for ghuuu
|
||||
&.or. (m /= mxGetM(prhs(11))) & ! Number of rows for ghxxu
|
||||
&.or. (n*n*q /= mxGetN(prhs(11))) & ! Number of columns for ghxxu
|
||||
&.or. (m /= mxGetM(prhs(12))) & ! Number of rows for ghxuu
|
||||
&.or. (n*q*q /= mxGetN(prhs(12))) & ! Number of columns for ghxuu
|
||||
&.or. (m /= mxGetM(prhs(13))) & ! Number of rows for ghxss
|
||||
&.or. (n /= mxGetN(prhs(13))) & ! Number of columns for ghxss
|
||||
&.or. (m /= mxGetM(prhs(14))) & ! Number of rows for ghuss
|
||||
&.or. (q /= mxGetN(prhs(14))) & ! Number of columns for ghuss
|
||||
&.or. ((nrhs == 17) & ! With pruning optional inputs
|
||||
&.and. ((2*n /= mxGetM(prhs(15))) & ! Number of rows for yhat2
|
||||
&.or. (s /= mxGetN(prhs(15))) & ! Number of columns for yhat2
|
||||
&.or. (m /= mxGetM(prhs(16)))))) then ! Number of rows for ss
|
||||
! &) then
|
||||
call mexErrMsgTxt("Input dimension mismatch")
|
||||
end if
|
||||
|
||||
! 1. Getting relevant information to take advantage of symmetries
|
||||
! There are symmetries in the ghxx, ghuu, ghxxx, ghuuu, ghxxu and ghxuu terms
|
||||
! that we may exploit to avoid unnecessarily repeating operations in matrix-vector
|
||||
! multiplications, e.g in ghxx·ŷ⊗ŷ.
|
||||
! In matrix-vector multiplications such as ghxx·ŷ⊗ŷ, we loop through all the folded offsets
|
||||
! and thus need for each one of them :
|
||||
! (i) the corresponding folded index, e.g (α₁,α₂), α₁≤α₂ for ghxx
|
||||
! (i) the corresponding offset in the unfolded matrix
|
||||
! (ii) the corresponding number of equivalent unfolded indices (1 if α₁=α₂, 2 otherwise)
|
||||
! It is better to compute these beforehand as it avoids repeating the calculation for
|
||||
! each particle. The `folded_offset_loop` routine carries out this operation.
|
||||
|
||||
p = pascal_triangle(max(n,q)+3-1)
|
||||
xx_size = get(2,n+2-1,p)
|
||||
uu_size = get(2,q+2-1,p)
|
||||
xxx_size = get(3,n+3-1,p)
|
||||
uuu_size = get(3,q+3-1,p)
|
||||
|
||||
td3%xx_size = xx_size
|
||||
td3%uu_size = uu_size
|
||||
td3%xxx_size = xxx_size
|
||||
td3%uuu_size = uuu_size
|
||||
|
||||
allocate(td3%xx_idcs(xx_size), td3%uu_idcs(uu_size), &
|
||||
&td3%xxx_idcs(xxx_size), td3%uuu_idcs(uuu_size), &
|
||||
&xx_off(xx_size), uu_off(uu_size), &
|
||||
&xxx_off(xxx_size), uuu_off(uuu_size), &
|
||||
&xx_nbeq(xx_size), uu_nbeq(uu_size), &
|
||||
&xxx_nbeq(xxx_size), uuu_nbeq(uuu_size))
|
||||
|
||||
call folded_offset_loop(td3%xx_idcs, xx_nbeq, &
|
||||
&xx_off, n, 2, p)
|
||||
call folded_offset_loop(td3%uu_idcs, uu_nbeq, &
|
||||
&uu_off, q, 2, p)
|
||||
call folded_offset_loop(td3%xxx_idcs, xxx_nbeq, &
|
||||
&xxx_off, n, 3, p)
|
||||
call folded_offset_loop(td3%uuu_idcs, uuu_nbeq, &
|
||||
&uuu_off, q, 3, p)
|
||||
|
||||
! 1. Storing the relevant input variables in Fortran
|
||||
td3%yhat3(1:n,1:s) => mxGetPr(prhs(1))
|
||||
td3%e(1:q,1:s) => mxGetPr(prhs(2))
|
||||
ghx(1:m,1:n) => mxGetPr(prhs(3))
|
||||
ghu(1:m,1:q) => mxGetPr(prhs(4))
|
||||
td3%constant => mxGetPr(prhs(5))
|
||||
ghxx(1:m,1:n*n) => mxGetPr(prhs(6))
|
||||
ghuu(1:m,1:q*q) => mxGetPr(prhs(7))
|
||||
ghxu(1:m,1:n*q) => mxGetPr(prhs(8))
|
||||
ghxxx(1:m,1:n*n*n) => mxGetPr(prhs(9))
|
||||
ghuuu(1:m,1:q*q*q) => mxGetPr(prhs(10))
|
||||
ghxxu(1:m,1:n*n*q) => mxGetPr(prhs(11))
|
||||
ghxuu(1:m,1:n*q*q) => mxGetPr(prhs(12))
|
||||
ghxss(1:m,1:n) => mxGetPr(prhs(13))
|
||||
ghuss(1:m,1:q) => mxGetPr(prhs(14))
|
||||
if (nrhs == 17) then
|
||||
yhatlat(1:2*n,1:s) => mxGetPr(prhs(15))
|
||||
td3%yhat1 => yhatlat(1:n,1:s)
|
||||
td3%yhat2 => yhatlat(n+1:2*n,1:s)
|
||||
td3%ss => mxGetPr(prhs(16))
|
||||
end if
|
||||
|
||||
! Getting a transposed folded copy of the unfolded tensors
|
||||
! for future loops to be more efficient
|
||||
allocate(td3%ghx(n,m), td3%ghu(q,m),&
|
||||
&td3%ghuu(uu_size,m), td3%ghxu(n*q,m), &
|
||||
&td3%ghxx(xx_size,m), &
|
||||
&td3%ghxxx(xxx_size,m), td3%ghuuu(uuu_size,m), &
|
||||
&td3%ghxxu(xx_size*q,m), td3%ghxuu(n*uu_size,m), &
|
||||
&td3%ghxss(n,m), td3%ghuss(q,m))
|
||||
do i=1,m
|
||||
do j=1,n
|
||||
td3%ghx(j,i) = ghx(i,j)
|
||||
td3%ghxss(j,i) = ghxss(i,j)
|
||||
td3%ghxx(j,i) = xx_nbeq(j)*ghxx(i,xx_off(j))
|
||||
td3%ghxxx(j,i) = xxx_nbeq(j)*ghxxx(i,xxx_off(j))
|
||||
do k=1,q
|
||||
td3%ghxu(q*(j-1)+k,i) = ghxu(i,q*(j-1)+k)
|
||||
td3%ghxxu(q*(j-1)+k,i) = xx_nbeq(j)*ghxxu(i,q*(xx_off(j)-1)+k)
|
||||
end do
|
||||
end do
|
||||
do j=n+1,xx_size
|
||||
td3%ghxx(j,i) = xx_nbeq(j)*ghxx(i,xx_off(j))
|
||||
td3%ghxxx(j,i) = xxx_nbeq(j)*ghxxx(i,xxx_off(j))
|
||||
do k=1,q
|
||||
td3%ghxxu(q*(j-1)+k,i) = xx_nbeq(j)*ghxxu(i,q*(xx_off(j)-1)+k)
|
||||
end do
|
||||
end do
|
||||
do j=xx_size+1,xxx_size
|
||||
td3%ghxxx(j,i) = xxx_nbeq(j)*ghxxx(i,xxx_off(j))
|
||||
end do
|
||||
do j=1,q
|
||||
td3%ghu(j,i) = ghu(i,j)
|
||||
td3%ghuss(j,i) = ghuss(i,j)
|
||||
td3%ghuu(j,i) = uu_nbeq(j)*ghuu(i,uu_off(j))
|
||||
td3%ghuuu(j,i) = uuu_nbeq(j)*ghuuu(i,uuu_off(j))
|
||||
do k=1,n
|
||||
td3%ghxuu(uu_size*(k-1)+j,i) = uu_nbeq(j)*ghxuu(i,q*q*(k-1)+uu_off(j))
|
||||
end do
|
||||
end do
|
||||
do j=q+1,uu_size
|
||||
td3%ghuu(j,i) = uu_nbeq(j)*ghuu(i,uu_off(j))
|
||||
td3%ghuuu(j,i) = uuu_nbeq(j)*ghuuu(i,uuu_off(j))
|
||||
do k=1,n
|
||||
td3%ghxuu(uu_size*(k-1)+j,i) = uu_nbeq(j)*ghxuu(i,q*q*(k-1)+uu_off(j))
|
||||
end do
|
||||
end do
|
||||
do j=uu_size+1,uuu_size
|
||||
td3%ghuuu(j,i) = uuu_nbeq(j)*ghuuu(i,uuu_off(j))
|
||||
end do
|
||||
end do
|
||||
|
||||
! 3. Implementing the calculations:
|
||||
|
||||
plhs(1) = mxCreateDoubleMatrix(int(m, mwSize), int(s, mwSize), mxREAL)
|
||||
td3%y3(1:m,1:s) => mxGetPr(plhs(1))
|
||||
if (nrhs == 17) then
|
||||
plhs(2) = mxCreateDoubleMatrix(int(2*m, mwSize), int(s, mwSize), mxREAL)
|
||||
ylat(1:2*m,1:s) => mxGetPr(plhs(2))
|
||||
td3%y1 => ylat(1:m,1:s)
|
||||
td3%y2 => ylat(m+1:2*m,1:s)
|
||||
end if
|
||||
|
||||
allocate(threads(numthreads), routines(numthreads))
|
||||
routines = [ (i, i = 1, numthreads) ]
|
||||
|
||||
if (numthreads == 1) then
|
||||
if (nrhs == 17) then
|
||||
call thread_eval_3_pruning(c_loc(routines(1)))
|
||||
else
|
||||
call thread_eval_3(c_loc(routines(1)))
|
||||
end if
|
||||
else
|
||||
! Creating the threads
|
||||
if (nrhs == 17) then
|
||||
do i = 1, numthreads
|
||||
rc = c_pthread_create(threads(i), c_null_ptr, c_funloc(thread_eval_3_pruning), c_loc(routines(i)))
|
||||
end do
|
||||
else
|
||||
do i = 1, numthreads
|
||||
rc = c_pthread_create(threads(i), c_null_ptr, c_funloc(thread_eval_3), c_loc(routines(i)))
|
||||
end do
|
||||
end if
|
||||
|
||||
! Joining the threads
|
||||
do i = 1, numthreads
|
||||
rc = c_pthread_join(threads(i), c_loc(routines(i)))
|
||||
end do
|
||||
end if
|
||||
|
||||
end subroutine mexFunction
|
|
@ -15,6 +15,69 @@
|
|||
! You should have received a copy of the GNU General Public License
|
||||
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
! Routines and data structures for multithreading over particles in local_state_space_iteration_k
|
||||
module pparticle
|
||||
use iso_c_binding
|
||||
use simulation
|
||||
use matlab_mex
|
||||
|
||||
implicit none
|
||||
|
||||
type tdata
|
||||
integer :: nm, nys, endo_nbr, nvar, order, nrestricted, nparticles
|
||||
real(real64), allocatable :: yhat(:,:), e(:,:), ynext(:,:), ys_reordered(:), restrict_var_list(:)
|
||||
type(pol), dimension(:), allocatable :: udr
|
||||
end type tdata
|
||||
|
||||
type(tdata) :: thread_data
|
||||
|
||||
contains
|
||||
|
||||
subroutine thread_eval(arg) bind(c)
|
||||
type(c_ptr), intent(in), value :: arg
|
||||
integer, pointer :: im
|
||||
integer :: i, j, start, end, q, r, ind
|
||||
type(horner), dimension(:), allocatable :: h
|
||||
real(real64), dimension(:), allocatable :: dyu
|
||||
|
||||
! Checking that the thread number got passed as argument
|
||||
if (.not. c_associated(arg)) then
|
||||
call mexErrMsgTxt("No argument was passed to thread_eval")
|
||||
end if
|
||||
call c_f_pointer(arg, im)
|
||||
|
||||
! Allocating local arrays
|
||||
allocate(h(0:thread_data%order), dyu(thread_data%nvar))
|
||||
do i=0, thread_data%order
|
||||
allocate(h(i)%c(thread_data%endo_nbr, thread_data%nvar**i))
|
||||
end do
|
||||
|
||||
! Specifying bounds for the curent thread
|
||||
q = thread_data%nparticles / thread_data%nm
|
||||
r = mod(thread_data%nparticles, thread_data%nm)
|
||||
start = (im-1)*q+1
|
||||
if (im < thread_data%nm) then
|
||||
end = start+q-1
|
||||
else
|
||||
end = thread_data%nparticles
|
||||
end if
|
||||
|
||||
! Using the Horner algorithm to evaluate the decision rule at the chosen yhat and epsilon
|
||||
do j=start,end
|
||||
dyu(1:thread_data%nys) = thread_data%yhat(:,j)
|
||||
dyu(thread_data%nys+1:) = thread_data%e(:,j)
|
||||
call eval(h, dyu, thread_data%udr, thread_data%endo_nbr, thread_data%nvar, thread_data%order)
|
||||
do i=1,thread_data%nrestricted
|
||||
ind = int(thread_data%restrict_var_list(i))
|
||||
thread_data%ynext(i,j) = h(0)%c(ind,1) + thread_data%ys_reordered(ind)
|
||||
end do
|
||||
end do
|
||||
|
||||
end subroutine thread_eval
|
||||
|
||||
end module pparticle
|
||||
|
||||
! The code of the local_state_space_iteration_k routine
|
||||
! input:
|
||||
! yhat values of endogenous variables
|
||||
! epsilon values of the exogenous shock
|
||||
|
@ -24,7 +87,6 @@
|
|||
! udr struct containing the model unfolded tensors
|
||||
! output:
|
||||
! ynext simulated next-period results
|
||||
|
||||
subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
|
||||
use iso_fortran_env
|
||||
use iso_c_binding
|
||||
|
|
|
@ -652,7 +652,8 @@ PARTICLEFILES = \
|
|||
particle/dsge_base2.mod \
|
||||
particle/first_spec.mod \
|
||||
particle/first_spec_MCMC.mod \
|
||||
particle/local_state_space_iteration_k_test.mod
|
||||
particle/local_state_space_iteration_k_test.mod \
|
||||
particle/local_state_space_iteration_3_test.mod
|
||||
|
||||
|
||||
XFAIL_MODFILES = ramst_xfail.mod \
|
||||
|
@ -959,6 +960,9 @@ discretionary_policy/dennis_1_estim.o.trs: discretionary_policy/dennis_1.o.trs
|
|||
discretionary_policy/Gali_2015_chapter_3_nonlinear.m.trs: discretionary_policy/Gali_2015_chapter_3.m.trs
|
||||
discretionary_policy/Gali_2015_chapter_3_nonlinear.o.trs: discretionary_policy/Gali_2015_chapter_3.o.trs
|
||||
|
||||
particle/local_state_space_iteration_3_test.m.trs: particle/first_spec.m.trs
|
||||
particle/local_state_space_iteration_3_test.o.trs: particle/first_spec.o.trs
|
||||
|
||||
particle/local_state_space_iteration_k_test.m.trs: particle/first_spec.m.trs
|
||||
particle/local_state_space_iteration_k_test.o.trs: particle/first_spec.o.trs
|
||||
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
Tests that local_state_space_iteration_3 and local_state_space_iteration_k (for k=3) return the same results
|
||||
|
||||
This file must be run after first_spec.mod (both are based on the same model).
|
||||
*/
|
||||
|
||||
@#include "first_spec_common.inc"
|
||||
|
||||
varobs q ca;
|
||||
|
||||
shocks;
|
||||
var eeps = 0.04^2;
|
||||
var nnu = 0.03^2;
|
||||
var q = 0.01^2;
|
||||
var ca = 0.01^2;
|
||||
end;
|
||||
|
||||
// Initialize various structures
|
||||
estimation(datafile='my_data.mat',order=3,mode_compute=0,mh_replic=0,filter_algorithm=sis,nonlinear_filter_initialization=2
|
||||
,cova_compute=0 %tell program that no covariance matrix was computed
|
||||
);
|
||||
|
||||
stoch_simul(order=3, periods=200, irf=0, k_order_solver);
|
||||
|
||||
// Really perform the test
|
||||
|
||||
nparticles = options_.particle.number_of_particles;
|
||||
nsims = 1e6/nparticles;
|
||||
|
||||
/* We generate particles using realistic distributions (though this is not
|
||||
strictly needed) */
|
||||
nstates = M_.npred + M_.nboth;
|
||||
state_idx = oo_.dr.order_var((M_.nstatic+1):(M_.nstatic+nstates));
|
||||
yhat = chol(oo_.var(state_idx,state_idx))*randn(nstates, nparticles);
|
||||
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
|
||||
|
||||
dr = oo_.dr;
|
||||
|
||||
|
||||
// “rf” stands for “Reduced Form”
|
||||
rf_ghx = dr.ghx(dr.restrict_var_list, :);
|
||||
rf_ghu = dr.ghu(dr.restrict_var_list, :);
|
||||
rf_constant = dr.ys(dr.order_var)+0.5*dr.ghs2;
|
||||
rf_constant = rf_constant(dr.restrict_var_list, :);
|
||||
rf_ghxx = dr.ghxx(dr.restrict_var_list, :);
|
||||
rf_ghuu = dr.ghuu(dr.restrict_var_list, :);
|
||||
rf_ghxu = dr.ghxu(dr.restrict_var_list, :);
|
||||
rf_ghxxx = dr.ghxxx(dr.restrict_var_list, :);
|
||||
rf_ghuuu = dr.ghuuu(dr.restrict_var_list, :);
|
||||
rf_ghxxu = dr.ghxxu(dr.restrict_var_list, :);
|
||||
rf_ghxuu = dr.ghxuu(dr.restrict_var_list, :);
|
||||
rf_ghxss = dr.ghxss(dr.restrict_var_list, :);
|
||||
rf_ghuss = dr.ghuss(dr.restrict_var_list, :);
|
||||
|
||||
options_.threads.local_state_space_iteration_3 = 12;
|
||||
options_.threads.local_state_space_iteration_k = 12;
|
||||
|
||||
% Without pruning
|
||||
tStart1 = tic; for i=1:nsims, ynext1 = local_state_space_iteration_3(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, rf_ghxxx, rf_ghuuu, rf_ghxxu, rf_ghxuu, rf_ghxss, rf_ghuss, options_.threads.local_state_space_iteration_3); end, tElapsed1 = toc(tStart1);
|
||||
tStart2 = tic; [udr] = folded_to_unfolded_dr(dr, M_, options_); for i=1:nsims, ynext2 = local_state_space_iteration_k(yhat, epsilon, dr, M_, options_, udr); end, tElapsed2 = toc(tStart2);
|
||||
|
||||
if max(max(abs(ynext1-ynext2))) > 1e-10
|
||||
error('Without pruning: Inconsistency between local_state_space_iteration_3 and local_state_space_iteration_k')
|
||||
end
|
||||
|
||||
if tElapsed1<tElapsed2
|
||||
skipline()
|
||||
dprintf('Without pruning: local_state_space_iteration_3 is %5.2f times faster than local_state_space_iteration_k', tElapsed2/tElapsed1)
|
||||
skipline()
|
||||
else
|
||||
skipline()
|
||||
dprintf('Without pruning: local_state_space_iteration_3 is %5.2f times slower than local_state_space_iteration_k', tElapsed1/tElapsed2)
|
||||
skipline()
|
||||
end
|
||||
|
||||
% With pruning
|
||||
rf_ss = dr.ys(dr.order_var);
|
||||
rf_ss = rf_ss(dr.restrict_var_list, :);
|
||||
yhat_ = chol(oo_.var(state_idx,state_idx))*randn(nstates, nparticles);
|
||||
yhat__ = zeros(2*nstates,nparticles);
|
||||
yhat__(1:nstates,:) = yhat_;
|
||||
yhat__(nstates+1:2*nstates,:) = chol(oo_.var(state_idx,state_idx))*randn(nstates, nparticles);
|
||||
nstatesandobs = size(rf_ghx,1);
|
||||
|
||||
[ynext1,ynext1_lat] = local_state_space_iteration_3(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, rf_ghxxx, rf_ghuuu, rf_ghxxu, rf_ghxuu, rf_ghxss, rf_ghuss, yhat__, rf_ss, options_.threads.local_state_space_iteration_3);
|
||||
[ynext2,ynext2_lat] = local_state_space_iteration_2(yhat__(nstates+1:2*nstates,:), epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, yhat_, rf_ss, options_.threads.local_state_space_iteration_2);
|
||||
if max(max(abs(ynext1_lat(nstatesandobs+1:2*nstatesandobs,:)-ynext2))) > 1e-14
|
||||
error('With pruning: inconsistency between local_state_space_iteration_2 and local_state_space_iteration_3 on the 2nd-order pruned variable')
|
||||
end
|
||||
if max(max(abs(ynext1_lat(1:nstatesandobs,:)-ynext2_lat))) > 1e-14
|
||||
error('With pruning: inconsistency between local_state_space_iteration_2 and local_state_space_iteration_3 on the 1st-order pruned variable')
|
||||
end
|
Loading…
Reference in New Issue