Dynare++: by default, use as many threads as there are logical CPUs

The former default was 2 threads, since Dynare++ was written at a time when hyper-threading was being generalized (but multiple cores were not).
2019-01-29 16:34:25 +01:00 · 2019-01-29 16:34:25 +01:00 · 733308807d
parent edda6e3038
commit 733308807d
5 changed files with 13 additions and 22 deletions
--- a/dynare++/doc/dynare++-tutorial.tex
+++ b/dynare++/doc/dynare++-tutorial.tex
@ -1033,12 +1033,8 @@ default.
 threads. Complex evaluations of Faa Di Bruno formulas, simulations and
 numerical integration can be parallelized, Dynare++ exploits this
 advantage. You have to have a hardware support for this, otherwise
-there is no gain from the parallelization. As a rule of thumb, set the
-number of threads to the number of processors. An exception is a
-machine with Pentium 4 with Hyper Threading (abbreviated by HT). This
-processor can run two threads concurrently. The same applies to
-Dual-Core processors. Since these processors are present in most new
-PC desktops/laptops, the default is 2.
+there is no gain from the parallelization. The default value is the number of
+logical processors present on the machine.

 \item[\desc{\tt --ss-tol \it float}] This sets the tolerance of the
 non-linear solver of deterministic steady state to {\it float}. It is
--- a/dynare++/integ/testing/tests.cc
+++ b/dynare++/integ/testing/tests.cc
@ -23,8 +23,6 @@
 #include <memory>
 #include <cstdlib>

-const int num_threads = 2; // does nothing if DEBUG defined
-
 // evaluates unfolded (Dx)^k power, where x is a vector, D is a
 // Cholesky factor (lower triangular)
 class MomentFunction : public VectorFunction
@ -252,7 +250,7 @@ TestRunnable::smolyak_normal_moments(const GeneralMatrix &m, int imom, int level
    WallTimer tim("\tSmolyak quadrature time:         ");
    GaussHermite gs;
    SmolyakQuadrature quad(dim, level, gs);
-    quad.integrate(func, level, num_threads, smol_out);
+    quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, smol_out);
    std::cout << "\tNumber of Smolyak evaluations:    " << quad.numEvals(level) << std::endl;
  }

@ -281,7 +279,7 @@ TestRunnable::product_normal_moments(const GeneralMatrix &m, int imom, int level
    WallTimer tim("\tProduct quadrature time:         ");
    GaussHermite gs;
    ProductQuadrature quad(dim, gs);
-    quad.integrate(func, level, num_threads, prod_out);
+    quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, prod_out);
    std::cout << "\tNumber of product evaluations:    " << quad.numEvals(level) << std::endl;
  }

@ -309,7 +307,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res
  {
    WallTimer tim("\tSmolyak quadrature time:         ");
    SmolyakQuadrature quad(func.indim(), level, glq);
-    quad.integrate(func, level, num_threads, out);
+    quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out);
    out.add(-1.0, res);
    smol_error = out.getMax();
    std::cout << "\tNumber of Smolyak evaluations:    " << quad.numEvals(level) << std::endl;
@ -318,7 +316,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res
  {
    WallTimer tim("\tProduct quadrature time:         ");
    ProductQuadrature quad(func.indim(), glq);
-    quad.integrate(func, level, num_threads, out);
+    quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out);
    out.add(-1.0, res);
    prod_error = out.getMax();
    std::cout << "\tNumber of product evaluations:    " << quad.numEvals(level) << std::endl;
@ -338,7 +336,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
    WarnockPerScheme wps;
    QMCarloCubeQuadrature qmc(func.indim(), level, wps);
    //		qmc.savePoints("warnock.txt", level);
-    qmc.integrate(func, level, num_threads, r);
+    qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
    error1 = std::max(res - r[0], r[0] - res);
    std::cout << "\tQuasi-Monte Carlo (Warnock scrambling) error: " << std::setw(16) << std::setprecision(12) << error1 << std::endl;
  }
@ -348,7 +346,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
    ReversePerScheme rps;
    QMCarloCubeQuadrature qmc(func.indim(), level, rps);
    //		qmc.savePoints("reverse.txt", level);
-    qmc.integrate(func, level, num_threads, r);
+    qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
    error2 = std::max(res - r[0], r[0] - res);
    std::cout << "\tQuasi-Monte Carlo (reverse scrambling) error: " << std::setw(16) << std::setprecision(12) << error2 << std::endl;
  }
@ -358,7 +356,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
    IdentityPerScheme ips;
    QMCarloCubeQuadrature qmc(func.indim(), level, ips);
    //		qmc.savePoints("identity.txt", level);
-    qmc.integrate(func, level, num_threads, r);
+    qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
    error3 = std::max(res - r[0], r[0] - res);
    std::cout << "\tQuasi-Monte Carlo (no scrambling) error:      " << std::setw(16) << std::setprecision(12) << error3 << std::endl;
  }
@ -498,7 +496,6 @@ main()
        nvmax = test->nvar;
    }
  tls.init(dmax, nvmax); // initialize library
-  sthread::detach_thread_group::max_parallel_threads = num_threads;

  // launch the tests
  int success = 0;
--- a/dynare++/src/dynare_params.cc
+++ b/dynare++/src/dynare_params.cc
@ -26,7 +26,7 @@ const char *help_str
  "    --prefix <string>    prefix of variables in Mat-4 file [\"dyn\"]\n"
  "    --seed <num>         random number generator seed [934098]\n"
  "    --order <num>        order of approximation [no default]\n"
-  "    --threads <num>      number of max parallel threads [2]\n"
+  "    --threads <num>      number of max parallel threads [nb. of logical CPUs]\n"
  "    --ss-tol <num>       steady state calcs tolerance [1.e-13]\n"
  "    --check pesPES       check model residuals [no checks]\n"
  "                         lower/upper case switches off/on\n"
--- a/dynare++/tl/cc/sthread.cc
+++ b/dynare++/tl/cc/sthread.cc
@ -5,9 +5,9 @@

 namespace sthread
 {
-  /* We set the default value for |max_parallel_threads| to 2, i.e.
-     uniprocessor machine with hyper-threading */
-  int detach_thread_group::max_parallel_threads = 2;
+  /* We set the default value for |max_parallel_threads| to the number of
+     logical CPUs */
+  int detach_thread_group::max_parallel_threads = std::thread::hardware_concurrency();

  /* We cycle through all threads in the group, and in each cycle we wait
     for the change in the |counter|. If the counter indicates less than
--- a/mex/sources/k_order_perturbation/k_order_perturbation.cc
+++ b/mex/sources/k_order_perturbation/k_order_perturbation.cc
@ -220,8 +220,6 @@ extern "C" {
    const int nSteps = 0; // Dynare++ solving steps, for time being default to 0 = deterministic steady state
    const double sstol = 1.e-13; //NL solver tolerance from

-    sthread::detach_thread_group::max_parallel_threads = 2; //params.num_threads;
-
    try
      {
        // make journal name and journal