diff --git a/dynare++/doc/dynare++-tutorial.tex b/dynare++/doc/dynare++-tutorial.tex index 0661f38be..38c9b1363 100644 --- a/dynare++/doc/dynare++-tutorial.tex +++ b/dynare++/doc/dynare++-tutorial.tex @@ -1033,12 +1033,8 @@ default. threads. Complex evaluations of Faa Di Bruno formulas, simulations and numerical integration can be parallelized, Dynare++ exploits this advantage. You have to have a hardware support for this, otherwise -there is no gain from the parallelization. As a rule of thumb, set the -number of threads to the number of processors. An exception is a -machine with Pentium 4 with Hyper Threading (abbreviated by HT). This -processor can run two threads concurrently. The same applies to -Dual-Core processors. Since these processors are present in most new -PC desktops/laptops, the default is 2. +there is no gain from the parallelization. The default value is the number of +logical processors present on the machine. \item[\desc{\tt --ss-tol \it float}] This sets the tolerance of the non-linear solver of deterministic steady state to {\it float}. It is diff --git a/dynare++/integ/testing/tests.cc b/dynare++/integ/testing/tests.cc index eb52034cd..b8ae2f8e4 100644 --- a/dynare++/integ/testing/tests.cc +++ b/dynare++/integ/testing/tests.cc @@ -23,8 +23,6 @@ #include #include -const int num_threads = 2; // does nothing if DEBUG defined - // evaluates unfolded (Dx)^k power, where x is a vector, D is a // Cholesky factor (lower triangular) class MomentFunction : public VectorFunction @@ -252,7 +250,7 @@ TestRunnable::smolyak_normal_moments(const GeneralMatrix &m, int imom, int level WallTimer tim("\tSmolyak quadrature time: "); GaussHermite gs; SmolyakQuadrature quad(dim, level, gs); - quad.integrate(func, level, num_threads, smol_out); + quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, smol_out); std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl; } @@ -281,7 +279,7 @@ TestRunnable::product_normal_moments(const GeneralMatrix &m, int imom, int level WallTimer tim("\tProduct quadrature time: "); GaussHermite gs; ProductQuadrature quad(dim, gs); - quad.integrate(func, level, num_threads, prod_out); + quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, prod_out); std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl; } @@ -309,7 +307,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res { WallTimer tim("\tSmolyak quadrature time: "); SmolyakQuadrature quad(func.indim(), level, glq); - quad.integrate(func, level, num_threads, out); + quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out); out.add(-1.0, res); smol_error = out.getMax(); std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl; @@ -318,7 +316,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res { WallTimer tim("\tProduct quadrature time: "); ProductQuadrature quad(func.indim(), glq); - quad.integrate(func, level, num_threads, out); + quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out); out.add(-1.0, res); prod_error = out.getMax(); std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl; @@ -338,7 +336,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l WarnockPerScheme wps; QMCarloCubeQuadrature qmc(func.indim(), level, wps); // qmc.savePoints("warnock.txt", level); - qmc.integrate(func, level, num_threads, r); + qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r); error1 = std::max(res - r[0], r[0] - res); std::cout << "\tQuasi-Monte Carlo (Warnock scrambling) error: " << std::setw(16) << std::setprecision(12) << error1 << std::endl; } @@ -348,7 +346,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l ReversePerScheme rps; QMCarloCubeQuadrature qmc(func.indim(), level, rps); // qmc.savePoints("reverse.txt", level); - qmc.integrate(func, level, num_threads, r); + qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r); error2 = std::max(res - r[0], r[0] - res); std::cout << "\tQuasi-Monte Carlo (reverse scrambling) error: " << std::setw(16) << std::setprecision(12) << error2 << std::endl; } @@ -358,7 +356,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l IdentityPerScheme ips; QMCarloCubeQuadrature qmc(func.indim(), level, ips); // qmc.savePoints("identity.txt", level); - qmc.integrate(func, level, num_threads, r); + qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r); error3 = std::max(res - r[0], r[0] - res); std::cout << "\tQuasi-Monte Carlo (no scrambling) error: " << std::setw(16) << std::setprecision(12) << error3 << std::endl; } @@ -498,7 +496,6 @@ main() nvmax = test->nvar; } tls.init(dmax, nvmax); // initialize library - sthread::detach_thread_group::max_parallel_threads = num_threads; // launch the tests int success = 0; diff --git a/dynare++/src/dynare_params.cc b/dynare++/src/dynare_params.cc index 63f9e94c8..c479a3ee7 100644 --- a/dynare++/src/dynare_params.cc +++ b/dynare++/src/dynare_params.cc @@ -26,7 +26,7 @@ const char *help_str " --prefix prefix of variables in Mat-4 file [\"dyn\"]\n" " --seed random number generator seed [934098]\n" " --order order of approximation [no default]\n" - " --threads number of max parallel threads [2]\n" + " --threads number of max parallel threads [nb. of logical CPUs]\n" " --ss-tol steady state calcs tolerance [1.e-13]\n" " --check pesPES check model residuals [no checks]\n" " lower/upper case switches off/on\n" diff --git a/dynare++/tl/cc/sthread.cc b/dynare++/tl/cc/sthread.cc index 5c5c7963d..36e21b9f3 100644 --- a/dynare++/tl/cc/sthread.cc +++ b/dynare++/tl/cc/sthread.cc @@ -5,9 +5,9 @@ namespace sthread { - /* We set the default value for |max_parallel_threads| to 2, i.e. - uniprocessor machine with hyper-threading */ - int detach_thread_group::max_parallel_threads = 2; + /* We set the default value for |max_parallel_threads| to the number of + logical CPUs */ + int detach_thread_group::max_parallel_threads = std::thread::hardware_concurrency(); /* We cycle through all threads in the group, and in each cycle we wait for the change in the |counter|. If the counter indicates less than diff --git a/mex/sources/k_order_perturbation/k_order_perturbation.cc b/mex/sources/k_order_perturbation/k_order_perturbation.cc index 98b3058aa..7b119a00d 100644 --- a/mex/sources/k_order_perturbation/k_order_perturbation.cc +++ b/mex/sources/k_order_perturbation/k_order_perturbation.cc @@ -220,8 +220,6 @@ extern "C" { const int nSteps = 0; // Dynare++ solving steps, for time being default to 0 = deterministic steady state const double sstol = 1.e-13; //NL solver tolerance from - sthread::detach_thread_group::max_parallel_threads = 2; //params.num_threads; - try { // make journal name and journal