Dynare++: by default, use as many threads as there are logical CPUs
The former default was 2 threads, since Dynare++ was written at a time when hyper-threading was being generalized (but multiple cores were not).time-shift
parent
edda6e3038
commit
733308807d
|
@ -1033,12 +1033,8 @@ default.
|
||||||
threads. Complex evaluations of Faa Di Bruno formulas, simulations and
|
threads. Complex evaluations of Faa Di Bruno formulas, simulations and
|
||||||
numerical integration can be parallelized, Dynare++ exploits this
|
numerical integration can be parallelized, Dynare++ exploits this
|
||||||
advantage. You have to have a hardware support for this, otherwise
|
advantage. You have to have a hardware support for this, otherwise
|
||||||
there is no gain from the parallelization. As a rule of thumb, set the
|
there is no gain from the parallelization. The default value is the number of
|
||||||
number of threads to the number of processors. An exception is a
|
logical processors present on the machine.
|
||||||
machine with Pentium 4 with Hyper Threading (abbreviated by HT). This
|
|
||||||
processor can run two threads concurrently. The same applies to
|
|
||||||
Dual-Core processors. Since these processors are present in most new
|
|
||||||
PC desktops/laptops, the default is 2.
|
|
||||||
|
|
||||||
\item[\desc{\tt --ss-tol \it float}] This sets the tolerance of the
|
\item[\desc{\tt --ss-tol \it float}] This sets the tolerance of the
|
||||||
non-linear solver of deterministic steady state to {\it float}. It is
|
non-linear solver of deterministic steady state to {\it float}. It is
|
||||||
|
|
|
@ -23,8 +23,6 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
const int num_threads = 2; // does nothing if DEBUG defined
|
|
||||||
|
|
||||||
// evaluates unfolded (Dx)^k power, where x is a vector, D is a
|
// evaluates unfolded (Dx)^k power, where x is a vector, D is a
|
||||||
// Cholesky factor (lower triangular)
|
// Cholesky factor (lower triangular)
|
||||||
class MomentFunction : public VectorFunction
|
class MomentFunction : public VectorFunction
|
||||||
|
@ -252,7 +250,7 @@ TestRunnable::smolyak_normal_moments(const GeneralMatrix &m, int imom, int level
|
||||||
WallTimer tim("\tSmolyak quadrature time: ");
|
WallTimer tim("\tSmolyak quadrature time: ");
|
||||||
GaussHermite gs;
|
GaussHermite gs;
|
||||||
SmolyakQuadrature quad(dim, level, gs);
|
SmolyakQuadrature quad(dim, level, gs);
|
||||||
quad.integrate(func, level, num_threads, smol_out);
|
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, smol_out);
|
||||||
std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl;
|
std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -281,7 +279,7 @@ TestRunnable::product_normal_moments(const GeneralMatrix &m, int imom, int level
|
||||||
WallTimer tim("\tProduct quadrature time: ");
|
WallTimer tim("\tProduct quadrature time: ");
|
||||||
GaussHermite gs;
|
GaussHermite gs;
|
||||||
ProductQuadrature quad(dim, gs);
|
ProductQuadrature quad(dim, gs);
|
||||||
quad.integrate(func, level, num_threads, prod_out);
|
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, prod_out);
|
||||||
std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl;
|
std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -309,7 +307,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res
|
||||||
{
|
{
|
||||||
WallTimer tim("\tSmolyak quadrature time: ");
|
WallTimer tim("\tSmolyak quadrature time: ");
|
||||||
SmolyakQuadrature quad(func.indim(), level, glq);
|
SmolyakQuadrature quad(func.indim(), level, glq);
|
||||||
quad.integrate(func, level, num_threads, out);
|
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out);
|
||||||
out.add(-1.0, res);
|
out.add(-1.0, res);
|
||||||
smol_error = out.getMax();
|
smol_error = out.getMax();
|
||||||
std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl;
|
std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl;
|
||||||
|
@ -318,7 +316,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res
|
||||||
{
|
{
|
||||||
WallTimer tim("\tProduct quadrature time: ");
|
WallTimer tim("\tProduct quadrature time: ");
|
||||||
ProductQuadrature quad(func.indim(), glq);
|
ProductQuadrature quad(func.indim(), glq);
|
||||||
quad.integrate(func, level, num_threads, out);
|
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out);
|
||||||
out.add(-1.0, res);
|
out.add(-1.0, res);
|
||||||
prod_error = out.getMax();
|
prod_error = out.getMax();
|
||||||
std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl;
|
std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl;
|
||||||
|
@ -338,7 +336,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
|
||||||
WarnockPerScheme wps;
|
WarnockPerScheme wps;
|
||||||
QMCarloCubeQuadrature qmc(func.indim(), level, wps);
|
QMCarloCubeQuadrature qmc(func.indim(), level, wps);
|
||||||
// qmc.savePoints("warnock.txt", level);
|
// qmc.savePoints("warnock.txt", level);
|
||||||
qmc.integrate(func, level, num_threads, r);
|
qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
|
||||||
error1 = std::max(res - r[0], r[0] - res);
|
error1 = std::max(res - r[0], r[0] - res);
|
||||||
std::cout << "\tQuasi-Monte Carlo (Warnock scrambling) error: " << std::setw(16) << std::setprecision(12) << error1 << std::endl;
|
std::cout << "\tQuasi-Monte Carlo (Warnock scrambling) error: " << std::setw(16) << std::setprecision(12) << error1 << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -348,7 +346,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
|
||||||
ReversePerScheme rps;
|
ReversePerScheme rps;
|
||||||
QMCarloCubeQuadrature qmc(func.indim(), level, rps);
|
QMCarloCubeQuadrature qmc(func.indim(), level, rps);
|
||||||
// qmc.savePoints("reverse.txt", level);
|
// qmc.savePoints("reverse.txt", level);
|
||||||
qmc.integrate(func, level, num_threads, r);
|
qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
|
||||||
error2 = std::max(res - r[0], r[0] - res);
|
error2 = std::max(res - r[0], r[0] - res);
|
||||||
std::cout << "\tQuasi-Monte Carlo (reverse scrambling) error: " << std::setw(16) << std::setprecision(12) << error2 << std::endl;
|
std::cout << "\tQuasi-Monte Carlo (reverse scrambling) error: " << std::setw(16) << std::setprecision(12) << error2 << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -358,7 +356,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
|
||||||
IdentityPerScheme ips;
|
IdentityPerScheme ips;
|
||||||
QMCarloCubeQuadrature qmc(func.indim(), level, ips);
|
QMCarloCubeQuadrature qmc(func.indim(), level, ips);
|
||||||
// qmc.savePoints("identity.txt", level);
|
// qmc.savePoints("identity.txt", level);
|
||||||
qmc.integrate(func, level, num_threads, r);
|
qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
|
||||||
error3 = std::max(res - r[0], r[0] - res);
|
error3 = std::max(res - r[0], r[0] - res);
|
||||||
std::cout << "\tQuasi-Monte Carlo (no scrambling) error: " << std::setw(16) << std::setprecision(12) << error3 << std::endl;
|
std::cout << "\tQuasi-Monte Carlo (no scrambling) error: " << std::setw(16) << std::setprecision(12) << error3 << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -498,7 +496,6 @@ main()
|
||||||
nvmax = test->nvar;
|
nvmax = test->nvar;
|
||||||
}
|
}
|
||||||
tls.init(dmax, nvmax); // initialize library
|
tls.init(dmax, nvmax); // initialize library
|
||||||
sthread::detach_thread_group::max_parallel_threads = num_threads;
|
|
||||||
|
|
||||||
// launch the tests
|
// launch the tests
|
||||||
int success = 0;
|
int success = 0;
|
||||||
|
|
|
@ -26,7 +26,7 @@ const char *help_str
|
||||||
" --prefix <string> prefix of variables in Mat-4 file [\"dyn\"]\n"
|
" --prefix <string> prefix of variables in Mat-4 file [\"dyn\"]\n"
|
||||||
" --seed <num> random number generator seed [934098]\n"
|
" --seed <num> random number generator seed [934098]\n"
|
||||||
" --order <num> order of approximation [no default]\n"
|
" --order <num> order of approximation [no default]\n"
|
||||||
" --threads <num> number of max parallel threads [2]\n"
|
" --threads <num> number of max parallel threads [nb. of logical CPUs]\n"
|
||||||
" --ss-tol <num> steady state calcs tolerance [1.e-13]\n"
|
" --ss-tol <num> steady state calcs tolerance [1.e-13]\n"
|
||||||
" --check pesPES check model residuals [no checks]\n"
|
" --check pesPES check model residuals [no checks]\n"
|
||||||
" lower/upper case switches off/on\n"
|
" lower/upper case switches off/on\n"
|
||||||
|
|
|
@ -5,9 +5,9 @@
|
||||||
|
|
||||||
namespace sthread
|
namespace sthread
|
||||||
{
|
{
|
||||||
/* We set the default value for |max_parallel_threads| to 2, i.e.
|
/* We set the default value for |max_parallel_threads| to the number of
|
||||||
uniprocessor machine with hyper-threading */
|
logical CPUs */
|
||||||
int detach_thread_group::max_parallel_threads = 2;
|
int detach_thread_group::max_parallel_threads = std::thread::hardware_concurrency();
|
||||||
|
|
||||||
/* We cycle through all threads in the group, and in each cycle we wait
|
/* We cycle through all threads in the group, and in each cycle we wait
|
||||||
for the change in the |counter|. If the counter indicates less than
|
for the change in the |counter|. If the counter indicates less than
|
||||||
|
|
|
@ -220,8 +220,6 @@ extern "C" {
|
||||||
const int nSteps = 0; // Dynare++ solving steps, for time being default to 0 = deterministic steady state
|
const int nSteps = 0; // Dynare++ solving steps, for time being default to 0 = deterministic steady state
|
||||||
const double sstol = 1.e-13; //NL solver tolerance from
|
const double sstol = 1.e-13; //NL solver tolerance from
|
||||||
|
|
||||||
sthread::detach_thread_group::max_parallel_threads = 2; //params.num_threads;
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// make journal name and journal
|
// make journal name and journal
|
||||||
|
|
Loading…
Reference in New Issue