! Copyright © 2022 Dynare Team ! ! This file is part of Dynare. ! ! Dynare is free software: you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. ! ! Dynare is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU General Public License for more details. ! ! You should have received a copy of the GNU General Public License ! along with Dynare. If not, see . ! Routines and data structures for multithreading over particles in local_state_space_iteration_3 module pparticle_3 use matlab_mex use partitions implicit none type tdata_3 integer :: n, m, s, q, numthreads, xx_size, uu_size, xxx_size, uuu_size real(real64), pointer, contiguous :: yhat3(:,:), & &e(:,:), ghx(:,:), ghu(:,:), constant(:), ghxu(:,:), ghxx(:,:), & &ghuu(:,:), ghxxx(:,:), ghuuu(:,:), ghxxu(:,:), ghxuu(:,:), ghxss(:,:), & &ghuss(:,:), ss(:), y3(:,:) real(real64), pointer :: yhat2(:,:), yhat1(:,:), y2(:,:), y1(:,:) type(index), pointer, contiguous :: xx_idcs(:), uu_idcs(:), & &xxx_idcs(:), uuu_idcs(:) end type tdata_3 type(tdata_3) :: td3 contains ! Fills y3 as y3 = ybar + ½ghss + ghx·ŷ+ghu·ε + ½ghxx·ŷ⊗ŷ + ½ghuu·ε⊗ε + ! ghxu·ŷ⊗ε + (1/6)·ghxxx ŷ⊗ŷ⊗ŷ + (1/6)·ghuuu·ε⊗ε⊗ε + ! (3/6)·ghxxu·ŷ⊗ŷ⊗ε + (3/6)·ghxuu·ŷ⊗ε⊗ε + ! (3/6)·ghxss·ŷ + (3/6)·ghuss·ε ! in td3 subroutine thread_eval_3(arg) bind(c) type(c_ptr), intent(in), value :: arg integer, pointer :: ithread integer :: is, im, j, k, start, end, q, r ! Checking that the thread number got passed as argument if (.not. c_associated(arg)) then call mexErrMsgTxt("No argument was passed to thread_eval_3") end if call c_f_pointer(arg, ithread) ! Specifying bounds for the curent thread q = td3%s / td3%numthreads r = mod(td3%s, td3%numthreads) start = (ithread-1)*q+1 if (ithread < td3%numthreads) then end = start+q-1 else end = td3%s end if do is=start,end do im=1,td3%m ! y3 = ybar + ½ghss td3%y3(im,is) = td3%constant(im) ! y3 += ghx·ŷ+(3/6)·ghxss·ŷ + first n folded indices for ½ghxx·ŷ⊗ŷ ! + first n folded indices for (1/6)ghxxx·ŷ⊗ŷ⊗ŷ do j=1,td3%n td3%y3(im,is) = td3%y3(im,is)+& &(0.5*td3%ghxss(j,im)+td3%ghx(j,im))*td3%yhat3(j,is)+& &(0.5*td3%ghxx(j,im)+(1./6.)*td3%ghxxx(j,im)*td3%yhat3(1, is))*& &td3%yhat3(1, is)*td3%yhat3(j,is) ! y3 += ghxu·ŷ⊗ε ! + first n*q folded indices of (3/6)·ghxxu·ŷ⊗ŷ⊗ε do k=1,td3%q td3%y3(im,is) = td3%y3(im,is) + & &(td3%ghxu(td3%q*(j-1)+k,im)+& &0.5*td3%ghxxu(td3%q*(j-1)+k,im)*td3%yhat3(1, is))*& &td3%yhat3(j, is)*td3%e(k, is) end do end do ! y3 += ghu·ε+(3/6)·ghuss·ε + first q folded indices of ½ghuu·ε⊗ε ! + first q folded indices for (1/6)·ghuuu·ε⊗ε⊗ε ! + first n*q folded indices of (3/6)·ghxuu·ŷ⊗ε⊗ε do j=1,td3%q td3%y3(im,is) = td3%y3(im,is) + & &(0.5*td3%ghuss(j,im)+td3%ghu(j,im))*td3%e(j,is) + & &(0.5*td3%ghuu(j,im)+(1./6.)*td3%ghuuu(j,im)*& &td3%e(1, is))*td3%e(1, is)*td3%e(j, is) do k=1,td3%n td3%y3(im,is) = td3%y3(im,is) + & &0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*& &td3%yhat3(k, is)*td3%e(1, is)*td3%e(j, is) end do end do ! y3 += remaining ½ghxx·ŷ⊗ŷ terms ! + the next terms starting from n+1 up to xx_size ! of (1/6)ghxxx·ŷ⊗ŷ⊗ŷ ! + remaining terms of (3/6)·ghxxu·ŷ⊗ŷ⊗ε do j=td3%n+1,td3%xx_size td3%y3(im,is) = td3%y3(im,is) + & &(0.5*td3%ghxx(j,im)+(1./6.)*td3%ghxxx(j,im)*td3%yhat3(1, is))*& &td3%yhat3(td3%xx_idcs(j)%coor(1), is)*& &td3%yhat3(td3%xx_idcs(j)%coor(2), is) do k=1,td3%q td3%y3(im,is) = td3%y3(im,is)+& &0.5*td3%ghxxu(td3%q*(j-1)+k,im)*& &td3%yhat3(td3%xx_idcs(j)%coor(1), is)*& &td3%yhat3(td3%xx_idcs(j)%coor(2), is)*& &td3%e(k, is) end do end do ! y3 += remaining ½ghuu·ε⊗ε terms ! + the next uu_size terms starting from q+1 ! of (1/6)·ghuuu·ε⊗ε⊗ε ! + remaining terms of (3/6)·ghxuu·ŷ⊗ε⊗ε do j=td3%q+1,td3%uu_size td3%y3(im,is) = td3%y3(im,is) + & &(0.5*td3%ghuu(j,im)+(1./6.)*td3%ghuuu(j,im)*td3%e(1, is))*& &td3%e(td3%uu_idcs(j)%coor(1), is)*& &td3%e(td3%uu_idcs(j)%coor(2), is) do k=1,td3%n td3%y3(im,is) = td3%y3(im,is) + & &0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*& &td3%yhat3(k, is)*& &td3%e(td3%uu_idcs(j)%coor(1), is)*& &td3%e(td3%uu_idcs(j)%coor(2), is) end do end do ! y3 += remaining (1/6)·ghxxx·ŷ⊗ŷ⊗ŷ terms do j=td3%xx_size+1,td3%xxx_size td3%y3(im,is) = td3%y3(im,is)+& &(1./6.)*td3%ghxxx(j,im)*& &td3%yhat3(td3%xxx_idcs(j)%coor(1), is)*& &td3%yhat3(td3%xxx_idcs(j)%coor(2), is)*& &td3%yhat3(td3%xxx_idcs(j)%coor(3), is) end do ! y3 += remaining (1/6)ghuuu·ε⊗ε⊗ε terms do j=td3%uu_size+1,td3%uuu_size td3%y3(im,is) = td3%y3(im,is) + & &(1./6.)*td3%ghuuu(j,im)*& &td3%e(td3%uuu_idcs(j)%coor(1), is)*& &td3%e(td3%uuu_idcs(j)%coor(2), is)*& &td3%e(td3%uuu_idcs(j)%coor(3), is) end do end do end do end subroutine thread_eval_3 ! Fills y1 and y2 as ! y1 = ybar + ghx·ŷ1 + ghu·ε ! y2 = ybar + ½ghss + ghx·ŷ2 + ghu·ε + ½ghxx·ŷ1⊗ŷ1 + ½ghuu·ε⊗ε + ghxu·ŷ1⊗ε ! y3 = ybar + ghx·ŷ3 + ghu·ε + ghxx·ŷ1⊗ŷ2 + ghuu·ε⊗ε + ghxu·ŷ1⊗ε + ghxu·ŷ2⊗ε ! + (1/6)·ghxxx·ŷ1⊗ŷ1⊗ŷ1 + (1/6)·ghuuu·ε⊗ε⊗ε + (3/6)·ghxxu·ŷ1⊗ŷ1⊗ε ! + (3/6)·ghxuu·ŷ1⊗ε⊗ε + (3/6)·ghxss·ŷ1 + (3/6)·ghuss·ε ! in td3 subroutine thread_eval_3_pruning(arg) bind(c) type(c_ptr), intent(in), value :: arg integer, pointer :: ithread integer :: is, im, j, k, start, end, q, r real(real64) :: x, y ! Checking that the thread number got passed as argument if (.not. c_associated(arg)) then call mexErrMsgTxt("No argument was passed to thread_eval") end if call c_f_pointer(arg, ithread) ! Specifying bounds for the curent thread q = td3%s / td3%numthreads r = mod(td3%s, td3%numthreads) start = (ithread-1)*q+1 if (ithread < td3%numthreads) then end = start+q-1 else end = td3%s end if do is=start,end do im=1,td3%m ! y1 = ybar ! y2 = ybar + ½ghss ! y3 = ybar td3%y1(im,is) = td3%ss(im) td3%y2(im,is) = td3%constant(im) td3%y3(im,is) = td3%ss(im) ! y1 += ghx·ŷ1 ! y2 += ghx·ŷ2 + first n folded indices for ½ghxx·ŷ1⊗ŷ1 ! y3 += ghx·ŷ3 +(3/6)·ghxss·ŷ ! + first n folded indices for ghxx·ŷ1⊗ŷ2 ! + first n folded indices for (1/6)ghxxx·ŷ1⊗ŷ1⊗ŷ1 do j=1,td3%n td3%y1(im,is) = td3%y1(im,is) + td3%ghx(j,im)*td3%yhat1(j,is) td3%y2(im,is) = td3%y2(im,is) + td3%ghx(j,im)*td3%yhat2(j,is) +& &0.5*td3%ghxx(j,im)*td3%yhat1(1, is)*td3%yhat1(j, is) td3%y3(im,is) = td3%y3(im,is) + td3%ghx(j,im)*td3%yhat3(j,is) +& &0.5*td3%ghxss(j,im)*td3%yhat1(j,is) +& &td3%ghxx(j,im)*td3%yhat1(1, is)*td3%yhat2(j, is)+& &(1./6.)*td3%ghxxx(j,im)*td3%yhat1(1, is)*& &td3%yhat1(1, is)*td3%yhat1(j,is) ! y2 += + ghxu·ŷ1⊗ε ! y3 += + ghxu·ŷ1⊗ε + ghxu·ŷ2⊗ε ! + first n*q folded indices of (3/6)·ghxxu·ŷ1⊗ŷ1⊗ε do k=1,td3%q td3%y2(im,is) = td3%y2(im,is)+& &td3%ghxu(td3%q*(j-1)+k,im)*& &td3%yhat1(j, is)*td3%e(k, is) td3%y3(im,is) = td3%y3(im,is)+& &td3%ghxu(td3%q*(j-1)+k,im)*& &(td3%yhat1(j, is)+td3%yhat2(j, is))*td3%e(k, is)+& &0.5*td3%ghxxu(td3%q*(j-1)+k,im)*td3%yhat1(1, is)*& &td3%yhat1(j, is)*td3%e(k, is) end do end do ! y1 += +ghu·ε ! y2 += +ghu·ε + first q folded indices for ½ghuu·ε⊗ε ! y3 += +ghu·ε + first q folded indices for ghuu·ε⊗ε ! + first n*q folded indices of (3/6)·ghxuu·ŷ1⊗ε⊗ε ! + first n folded indices of (1/6)·ghuuu·ε⊗ε⊗ε do j=1,td3%q x = td3%ghu(j,im)*td3%e(j,is) y = td3%ghuu(j,im)*td3%e(1, is)*td3%e(j, is) td3%y1(im,is) = td3%y1(im,is) + x td3%y2(im,is) = td3%y2(im,is) + x + 0.5*y td3%y3(im,is) = td3%y3(im,is) + x + y +& &td3%ghuss(j,im)*td3%e(j,is)+& &(1./6.)*td3%ghuuu(j,im)*td3%e(1, is)*td3%e(1, is)*& &td3%e(j, is) do k=1,td3%n td3%y3(im,is) = td3%y3(im,is) + & &0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*& &td3%yhat1(k, is)*td3%e(1, is)*td3%e(j, is) end do end do ! y2 += remaining ½ghxx·ŷ1⊗ŷ1 terms ! y3 += remaining ghxx·ŷ1⊗ŷ2 terms ! + the next terms starting from n+1 up to xx_size ! of (1/6)ghxxx·ŷ1⊗ŷ1⊗ŷ1 ! + remaining terms of (3/6)·ghxxu·ŷ1⊗ŷ1⊗ε do j=td3%n+1,td3%xx_size td3%y2(im,is) = td3%y2(im,is) + & &0.5*td3%ghxx(j,im)*& &td3%yhat1(td3%xx_idcs(j)%coor(1), is)*& &td3%yhat1(td3%xx_idcs(j)%coor(2), is) td3%y3(im,is) = td3%y3(im,is) + & &td3%ghxx(j,im)*& &td3%yhat1(td3%xx_idcs(j)%coor(1), is)*& &td3%yhat2(td3%xx_idcs(j)%coor(2), is)+& &(1./6.)*td3%ghxxx(j,im)*td3%yhat1(1, is)*& &td3%yhat1(td3%xx_idcs(j)%coor(1), is)*& &td3%yhat1(td3%xx_idcs(j)%coor(2), is) do k=1,td3%n td3%y3(im,is) = td3%y3(im,is) + & &0.5*td3%ghxxu(td3%q*(j-1)+k,im)*& &td3%yhat1(td3%xx_idcs(j)%coor(1), is)*& &td3%yhat1(td3%xx_idcs(j)%coor(2), is)*& &td3%e(k, is) end do end do ! y2 += remaining ½ghuu·ε⊗ε terms ! y3 += remaining ghuu·ε⊗ε terms ! + remaining terms of (3/6)·ghxuu·ŷ⊗ε⊗ε ! + the next uu_size terms starting from q+1 ! of (1/6)·ghuuu·ε⊗ε⊗ε do j=td3%q+1,td3%uu_size x = td3%ghuu(j,im)*& &td3%e(td3%uu_idcs(j)%coor(1), is)*& &td3%e(td3%uu_idcs(j)%coor(2), is) td3%y2(im,is) = td3%y2(im,is)+0.5*x td3%y3(im,is) = td3%y3(im,is)+x+& &(1./6.)*td3%ghuuu(j,im)*td3%e(1, is)*& &td3%e(td3%uu_idcs(j)%coor(1), is)*& &td3%e(td3%uu_idcs(j)%coor(2), is) do k=1,td3%n td3%y3(im,is) = td3%y3(im,is) + & &0.5*td3%ghxuu(td3%uu_size*(k-1)+j,im)*& &td3%yhat1(k, is)*& &td3%e(td3%uu_idcs(j)%coor(1), is)*& &td3%e(td3%uu_idcs(j)%coor(2), is) end do end do ! y3 += remaining (1/6)·ghxxx·ŷ⊗ŷ⊗ŷ terms do j=td3%xx_size+1,td3%xxx_size td3%y3(im,is) = td3%y3(im,is)+& &(1./6.)*td3%ghxxx(j,im)*& &td3%yhat1(td3%xxx_idcs(j)%coor(1), is)*& &td3%yhat1(td3%xxx_idcs(j)%coor(2), is)*& &td3%yhat1(td3%xxx_idcs(j)%coor(3), is) end do ! y3 += remaining (1/6)ghuuu·ε⊗ε⊗ε terms do j=td3%uu_size+1,td3%uuu_size td3%y3(im,is) = td3%y3(im,is) + & &(1./6.)*td3%ghuuu(j,im)*& &td3%e(td3%uuu_idcs(j)%coor(1), is)*& &td3%e(td3%uuu_idcs(j)%coor(2), is)*& &td3%e(td3%uuu_idcs(j)%coor(3), is) end do end do end do end subroutine thread_eval_3_pruning end module pparticle_3 ! The code of the local_state_space_iteration_3 routine ! Input: ! prhs[1] yhat3 [double] n×s array, time t particles. ! prhs[2] e [double] q×s array, time t innovations. ! prhs[3] ghx [double] m×n array, first order reduced form. ! prhs[4] ghu [double] m×q array, first order reduced form. ! prhs[5] constant [double] m×1 array, deterministic steady state + ! third order correction for the union of ! the states and observed variables. ! prhs[6] ghxx [double] m×n² array, second order reduced form. ! prhs[7] ghuu [double] m×q² array, second order reduced form. ! prhs[8] ghxu [double] m×nq array, second order reduced form. ! prhs[9] ghxxx [double] m×n array, third order reduced form. ! prhs[10] ghuuu [double] m×q array, third order reduced form. ! prhs[11] ghxxu [double] m×n²q array, third order reduced form. ! prhs[12] ghxuu [double] m×nq² array, third order reduced form. ! prhs[13] ghxss [double] m×n array, third order reduced form. ! prhs[14] ghuss [double] m×q array, third order reduced form. ! prhs[15] yhat2 [double] [OPTIONAL] 2n×s array, time t particles up to 2nd order pruning additional latent variables. The first n rows concern the pruning first-order latent variables, while the last n rows concern the pruning 2nd-order latent variables ! prhs[16] ss [double] [OPTIONAL] m×1 array, steady state for the union of the states and the observed variables (needed for the pruning mode). ! prhs[15 or 17] [double] num of threads ! ! Output: ! plhs[1] y3 [double] m×s array, time t+1 particles. ! plhs[2] y2 [double] 2m×s array, time t+1 particles for the pruning latent variables up to 2nd order. The first m rows concern the pruning first-order latent variables, while the last m rows concern the pruning 2nd-order latent variables subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') use iso_c_binding use matlab_mex use pascal use partitions use pthread use pparticle_3 implicit none type(c_ptr), dimension(*), intent(in), target :: prhs type(c_ptr), dimension(*), intent(out) :: plhs integer(c_int), intent(in), value :: nlhs, nrhs integer :: n, m, s, q, numthreads real(real64), pointer, contiguous :: ghx(:,:), ghu(:,:), ghxx(:,:), & &ghuu(:,:), ghxu(:,:), ghxxx(:,:), ghuuu(:,:), ghxxu(:,:), & &ghxuu(:,:), ghxss(:,:), ghuss(:,:), yhatlat(:,:), ylat(:,:) integer :: i, j, k, xx_size, uu_size, xxx_size, uuu_size, rc character(kind=c_char, len=10) :: arg_nber type(pascal_triangle) :: p integer, allocatable :: xx_nbeq(:), xxx_nbeq(:), & &uu_nbeq(:), uuu_nbeq(:), xx_off(:), uu_off(:), & &xxx_off(:), uuu_off(:) type(c_pthread_t), allocatable :: threads(:) integer, allocatable, target :: routines(:) ! 0. Checking the consistency and validity of input arguments if (nrhs /= 15 .and. nrhs /= 17) then call mexErrMsgTxt("Must have exactly 15 inputs or 18 inputs") end if if (nlhs > 2) then call mexErrMsgTxt("Too many output arguments.") end if do i=1, max(14, nrhs-1) if (.not. (c_associated(prhs(i)) .and. mxIsDouble(prhs(i)) .and. & (.not. mxIsComplex(prhs(i))) .and. (.not. mxIsSparse(prhs(i))))) then write (arg_nber,"(i2)") i call mexErrMsgTxt("Argument " // trim(arg_nber) // " should be a real dense matrix") end if end do i = max(15,nrhs) if (.not. (c_associated(prhs(i)) .and. mxIsScalar(prhs(i)) .and. & mxIsNumeric(prhs(i)))) then write (arg_nber,"(i2)") i call mexErrMsgTxt("Argument " // trim(arg_nber) // " should be a numeric scalar") end if numthreads = int(mxGetScalar(prhs(i))) if (numthreads <= 0) then write (arg_nber,"(i2)") i call mexErrMsgTxt("Argument " // trim(arg_nber) // " should be a positive integer") end if td3%numthreads = numthreads n = int(mxGetM(prhs(1))) ! Number of states. s = int(mxGetN(prhs(1))) ! Number of particles. q = int(mxGetM(prhs(2))) ! Number of innovations. m = int(mxGetM(prhs(3))) ! Number of elements in the union of states and observed variables. td3%n = n td3%s = s td3%q = q td3%m = m if ((s /= mxGetN(prhs(2))) & ! Number of columns for epsilon &.or. (n /= mxGetN(prhs(3))) & ! Number of columns for ghx &.or. (m /= mxGetM(prhs(4))) & ! Number of rows for ghu &.or. (q /= mxGetN(prhs(4))) & ! Number of columns for ghu &.or. (m /= mxGetM(prhs(5))) & ! Number of rows for 2nd order constant correction + deterministic steady state &.or. (m /= mxGetM(prhs(6))) & ! Number of rows for ghxx &.or. (n*n /= mxGetN(prhs(6))) & ! Number of columns for ghxx &.or. (m /= mxGetM(prhs(7))) & ! Number of rows for ghuu &.or. (q*q /= mxGetN(prhs(7))) & ! Number of columns for ghuu &.or. (m /= mxGetM(prhs(8))) & ! Number of rows for ghxu &.or. (n*q /= mxGetN(prhs(8))) & ! Number of columns for ghxu &.or. (m /= mxGetM(prhs(9))) & ! Number of rows for ghxxx &.or. (n*n*n /= mxGetN(prhs(9))) & ! Number of columns for ghxxx &.or. (m /= mxGetM(prhs(10))) & ! Number of rows for ghuuu &.or. (q*q*q /= mxGetN(prhs(10))) & ! Number of columns for ghuuu &.or. (m /= mxGetM(prhs(11))) & ! Number of rows for ghxxu &.or. (n*n*q /= mxGetN(prhs(11))) & ! Number of columns for ghxxu &.or. (m /= mxGetM(prhs(12))) & ! Number of rows for ghxuu &.or. (n*q*q /= mxGetN(prhs(12))) & ! Number of columns for ghxuu &.or. (m /= mxGetM(prhs(13))) & ! Number of rows for ghxss &.or. (n /= mxGetN(prhs(13))) & ! Number of columns for ghxss &.or. (m /= mxGetM(prhs(14))) & ! Number of rows for ghuss &.or. (q /= mxGetN(prhs(14))) & ! Number of columns for ghuss &.or. ((nrhs == 17) & ! With pruning optional inputs &.and. ((2*n /= mxGetM(prhs(15))) & ! Number of rows for yhat2 &.or. (s /= mxGetN(prhs(15))) & ! Number of columns for yhat2 &.or. (m /= mxGetM(prhs(16)))))) then ! Number of rows for ss ! &) then call mexErrMsgTxt("Input dimension mismatch") end if ! 1. Getting relevant information to take advantage of symmetries ! There are symmetries in the ghxx, ghuu, ghxxx, ghuuu, ghxxu and ghxuu terms ! that we may exploit to avoid unnecessarily repeating operations in matrix-vector ! multiplications, e.g in ghxx·ŷ⊗ŷ. ! In matrix-vector multiplications such as ghxx·ŷ⊗ŷ, we loop through all the folded offsets ! and thus need for each one of them : ! (i) the corresponding folded index, e.g (α₁,α₂), α₁≤α₂ for ghxx ! (i) the corresponding offset in the unfolded matrix ! (ii) the corresponding number of equivalent unfolded indices (1 if α₁=α₂, 2 otherwise) ! It is better to compute these beforehand as it avoids repeating the calculation for ! each particle. The `folded_offset_loop` routine carries out this operation. p = pascal_triangle(max(n,q)+3-1) xx_size = get(2,n+2-1,p) uu_size = get(2,q+2-1,p) xxx_size = get(3,n+3-1,p) uuu_size = get(3,q+3-1,p) td3%xx_size = xx_size td3%uu_size = uu_size td3%xxx_size = xxx_size td3%uuu_size = uuu_size allocate(td3%xx_idcs(xx_size), td3%uu_idcs(uu_size), & &td3%xxx_idcs(xxx_size), td3%uuu_idcs(uuu_size), & &xx_off(xx_size), uu_off(uu_size), & &xxx_off(xxx_size), uuu_off(uuu_size), & &xx_nbeq(xx_size), uu_nbeq(uu_size), & &xxx_nbeq(xxx_size), uuu_nbeq(uuu_size)) call folded_offset_loop(td3%xx_idcs, xx_nbeq, & &xx_off, n, 2, p) call folded_offset_loop(td3%uu_idcs, uu_nbeq, & &uu_off, q, 2, p) call folded_offset_loop(td3%xxx_idcs, xxx_nbeq, & &xxx_off, n, 3, p) call folded_offset_loop(td3%uuu_idcs, uuu_nbeq, & &uuu_off, q, 3, p) ! 1. Storing the relevant input variables in Fortran td3%yhat3(1:n,1:s) => mxGetPr(prhs(1)) td3%e(1:q,1:s) => mxGetPr(prhs(2)) ghx(1:m,1:n) => mxGetPr(prhs(3)) ghu(1:m,1:q) => mxGetPr(prhs(4)) td3%constant => mxGetPr(prhs(5)) ghxx(1:m,1:n*n) => mxGetPr(prhs(6)) ghuu(1:m,1:q*q) => mxGetPr(prhs(7)) ghxu(1:m,1:n*q) => mxGetPr(prhs(8)) ghxxx(1:m,1:n*n*n) => mxGetPr(prhs(9)) ghuuu(1:m,1:q*q*q) => mxGetPr(prhs(10)) ghxxu(1:m,1:n*n*q) => mxGetPr(prhs(11)) ghxuu(1:m,1:n*q*q) => mxGetPr(prhs(12)) ghxss(1:m,1:n) => mxGetPr(prhs(13)) ghuss(1:m,1:q) => mxGetPr(prhs(14)) if (nrhs == 17) then yhatlat(1:2*n,1:s) => mxGetPr(prhs(15)) td3%yhat1 => yhatlat(1:n,1:s) td3%yhat2 => yhatlat(n+1:2*n,1:s) td3%ss => mxGetPr(prhs(16)) end if ! Getting a transposed folded copy of the unfolded tensors ! for future loops to be more efficient allocate(td3%ghx(n,m), td3%ghu(q,m),& &td3%ghuu(uu_size,m), td3%ghxu(n*q,m), & &td3%ghxx(xx_size,m), & &td3%ghxxx(xxx_size,m), td3%ghuuu(uuu_size,m), & &td3%ghxxu(xx_size*q,m), td3%ghxuu(n*uu_size,m), & &td3%ghxss(n,m), td3%ghuss(q,m)) do i=1,m do j=1,n td3%ghx(j,i) = ghx(i,j) td3%ghxss(j,i) = ghxss(i,j) td3%ghxx(j,i) = xx_nbeq(j)*ghxx(i,xx_off(j)) td3%ghxxx(j,i) = xxx_nbeq(j)*ghxxx(i,xxx_off(j)) do k=1,q td3%ghxu(q*(j-1)+k,i) = ghxu(i,q*(j-1)+k) td3%ghxxu(q*(j-1)+k,i) = xx_nbeq(j)*ghxxu(i,q*(xx_off(j)-1)+k) end do end do do j=n+1,xx_size td3%ghxx(j,i) = xx_nbeq(j)*ghxx(i,xx_off(j)) td3%ghxxx(j,i) = xxx_nbeq(j)*ghxxx(i,xxx_off(j)) do k=1,q td3%ghxxu(q*(j-1)+k,i) = xx_nbeq(j)*ghxxu(i,q*(xx_off(j)-1)+k) end do end do do j=xx_size+1,xxx_size td3%ghxxx(j,i) = xxx_nbeq(j)*ghxxx(i,xxx_off(j)) end do do j=1,q td3%ghu(j,i) = ghu(i,j) td3%ghuss(j,i) = ghuss(i,j) td3%ghuu(j,i) = uu_nbeq(j)*ghuu(i,uu_off(j)) td3%ghuuu(j,i) = uuu_nbeq(j)*ghuuu(i,uuu_off(j)) do k=1,n td3%ghxuu(uu_size*(k-1)+j,i) = uu_nbeq(j)*ghxuu(i,q*q*(k-1)+uu_off(j)) end do end do do j=q+1,uu_size td3%ghuu(j,i) = uu_nbeq(j)*ghuu(i,uu_off(j)) td3%ghuuu(j,i) = uuu_nbeq(j)*ghuuu(i,uuu_off(j)) do k=1,n td3%ghxuu(uu_size*(k-1)+j,i) = uu_nbeq(j)*ghxuu(i,q*q*(k-1)+uu_off(j)) end do end do do j=uu_size+1,uuu_size td3%ghuuu(j,i) = uuu_nbeq(j)*ghuuu(i,uuu_off(j)) end do end do ! 3. Implementing the calculations: plhs(1) = mxCreateDoubleMatrix(int(m, mwSize), int(s, mwSize), mxREAL) td3%y3(1:m,1:s) => mxGetPr(plhs(1)) if (nrhs == 17) then plhs(2) = mxCreateDoubleMatrix(int(2*m, mwSize), int(s, mwSize), mxREAL) ylat(1:2*m,1:s) => mxGetPr(plhs(2)) td3%y1 => ylat(1:m,1:s) td3%y2 => ylat(m+1:2*m,1:s) end if allocate(threads(numthreads), routines(numthreads)) routines = [ (i, i = 1, numthreads) ] if (numthreads == 1) then if (nrhs == 17) then call thread_eval_3_pruning(c_loc(routines(1))) else call thread_eval_3(c_loc(routines(1))) end if else ! Creating the threads if (nrhs == 17) then do i = 1, numthreads rc = c_pthread_create(threads(i), c_null_ptr, c_funloc(thread_eval_3_pruning), c_loc(routines(i))) end do else do i = 1, numthreads rc = c_pthread_create(threads(i), c_null_ptr, c_funloc(thread_eval_3), c_loc(routines(i))) end do end if ! Joining the threads do i = 1, numthreads rc = c_pthread_join(threads(i), c_loc(routines(i))) end do end if end subroutine mexFunction