Dynare++: use C++11 threads (instead of POSIX threads API)

On Windows, this means that a POSIX threads implementation is no longer needed, since C++11 threads are implemented using native Windows threads. On GNU/Linux and macOS, POSIX threads are still used under the hood. A new m4 macro (AX_CXX11_THREAD) is used to add the proper compilation flags (instead of AX_PTHREAD).
2019-01-28 18:39:42 +01:00 · 2019-01-28 18:39:42 +01:00 · 752a02a36c
parent 5ea315db20
commit 752a02a36c
28 changed files with 201 additions and 1112 deletions
--- a/README.md
+++ b/README.md
@ -70,7 +70,6 @@ A number of tools and libraries are needed in order to recompile everything. You
 - [Autoconf](http://www.gnu.org/software/autoconf/), version 2.62 or later (only if you get the source through Git) (see [Installing an updated version of Autoconf in your own directory, in GNU/Linux](http://www.dynare.org/DynareWiki/AutoMake))
 - [Automake](http://www.gnu.org/software/automake/), version 1.11.2 or later (only if you get the source through Git) (see [Installing an updated version of AutoMake in your own directory, in GNU/Linux](http://www.dynare.org/DynareWiki/AutoMake))
 - An implementation of BLAS and LAPACK: either [ATLAS](http://math-atlas.sourceforge.net/), [OpenBLAS](http://xianyi.github.com/OpenBLAS/), Netlib ([BLAS](http://www.netlib.org/blas/), [LAPACK](http://www.netlib.org/lapack/)) or [MKL](http://software.intel.com/en-us/intel-mkl/) (only if you want to build Dynare++)
- An implementation of [POSIX Threads](http://en.wikipedia.org/wiki/POSIX_Threads) (optional, for taking advantage of multi-core)
 - [MAT File I/O library](http://sourceforge.net/projects/matio/) (if you want to compile Markov-Switching code, the estimation DLL, k-order DLL and Dynare++)
 - [SLICOT](http://www.slicot.org) (if you want to compile the Kalman steady state DLL)
 - [GSL library](http://www.gnu.org/software/gsl/) (if you want to compile Markov-Switching code)
--- a/configure.ac
+++ b/configure.ac
@ -79,6 +79,8 @@ CPPFLAGS="$CPPFLAGS_SAVED"
 # Don't use deprecated hash structures
 AC_DEFINE([BOOST_NO_HASH], [], [Don't use deprecated STL hash structures])

+AX_CXX11_THREAD
+
 # Check for libmatio, needed by Dynare++
 AX_MATIO
 AM_CONDITIONAL([HAVE_MATIO], [test "x$has_matio" = "xyes"])
@ -119,8 +121,6 @@ fi
 AM_CONDITIONAL([HAVE_BLAS], [test x"$ax_blas_ok" = "xyes"])
 AM_CONDITIONAL([HAVE_LAPACK], [test x"$ax_lapack_ok" = "xyes"])

-AX_PTHREAD
-
 AC_CONFIG_FILES([Makefile
                 VERSION
                 doc/Makefile
@ -181,11 +181,7 @@ AM_CONDITIONAL([ENABLE_ORG_EXPORT], [test "x$enable_org_export" != "x"])
 # Construct final output message

 if test "x$ax_blas_ok" = "xyes" -a "x$ax_lapack_ok" = "xyes" -a "x$has_matio" = "xyes"; then
-  if test x"$ax_pthread_ok" = "xyes"; then
-     BUILD_DYNAREPLUSPLUS="yes"
-  else
-     BUILD_DYNAREPLUSPLUS="yes (without POSIX threads)"
-  fi
+  BUILD_DYNAREPLUSPLUS="yes"
 else
  BUILD_DYNAREPLUSPLUS="no (missing one of: BLAS, LAPACK, MatIO)"
 fi
--- a/dynare++/integ/cc/Makefile.am
+++ b/dynare++/integ/cc/Makefile.am
@ -13,4 +13,4 @@ libinteg_a_SOURCES = \
 	vector_function.hh \
 	precalc_quadrature.hh
 libinteg_a_CPPFLAGS = -I../../sylv/cc -I../../tl/cc -I$(top_srcdir)/mex/sources
-libinteg_a_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+libinteg_a_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)
--- a/dynare++/integ/cc/quadrature.hh
+++ b/dynare++/integ/cc/quadrature.hh
@ -96,7 +96,7 @@ template <typename _Tpit>
 class QuadratureImpl;

 template <typename _Tpit>
-class IntegrationWorker : public THREAD
+class IntegrationWorker : public sthread::detach_thread
 {
  const QuadratureImpl<_Tpit> &quad;
  VectorFunction &func;
@ -138,7 +138,7 @@ public:
      }

    {
-      SYNCHRO syn(&outvec, "IntegrationWorker");
+      sthread::synchro syn(&outvec, "IntegrationWorker");
      outvec.add(1.0, tmpall);
    }
  }
@ -168,7 +168,7 @@ public:
    // todo: out.length()==func.outdim()
    // todo: dim == func.indim()
    out.zeros();
-    THREAD_GROUP gr;
+    sthread::detach_thread_group gr;
    for (int ti = 0; ti < fs.getNum(); ti++)
      gr.insert(std::make_unique<IntegrationWorker<_Tpit>>(*this, fs.getFunc(ti),
                                                           level, ti, fs.getNum(), out));
--- a/dynare++/integ/src/Makefile.am
+++ b/dynare++/integ/src/Makefile.am
@ -2,5 +2,5 @@ noinst_PROGRAMS = quadrature-points

 quadrature_points_SOURCES = quadrature-points.cc
 quadrature_points_CPPFLAGS = -I../.. -I../../sylv/cc -I../../integ/cc -I../../tl/cc
-quadrature_points_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
-quadrature_points_LDADD = ../cc/libinteg.a ../../tl/cc/libtl.a ../../parser/cc/libparser.a ../../sylv/cc/libsylv.a ../../utils/cc/libutils.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(PTHREAD_LIBS)
+quadrature_points_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)
+quadrature_points_LDADD = ../cc/libinteg.a ../../tl/cc/libtl.a ../../parser/cc/libparser.a ../../sylv/cc/libsylv.a ../../utils/cc/libutils.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS)
--- a/dynare++/integ/testing/Makefile.am
+++ b/dynare++/integ/testing/Makefile.am
@ -2,9 +2,9 @@ check_PROGRAMS = tests

 tests_SOURCES = tests.cc
 tests_CPPFLAGS = -I../cc -I../../tl/cc -I../../sylv/cc -I$(top_srcdir)/mex/sources
-tests_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+tests_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)
 tests_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
-tests_LDADD = ../../tl/cc/libtl.a ../../sylv/cc/libsylv.a ../cc/libinteg.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(PTHREAD_LIBS) $(LIBADD_MATIO)
+tests_LDADD = ../../tl/cc/libtl.a ../../sylv/cc/libsylv.a ../cc/libinteg.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(LIBADD_MATIO)

 check-local:
 	./tests
--- a/dynare++/integ/testing/tests.cc
+++ b/dynare++/integ/testing/tests.cc
@ -498,7 +498,7 @@ main()
        nvmax = test->nvar;
    }
  tls.init(dmax, nvmax); // initialize library
-  THREAD_GROUP::max_parallel_threads = num_threads;
+  sthread::detach_thread_group::max_parallel_threads = num_threads;

  // launch the tests
  int success = 0;
--- a/dynare++/kord/Makefile.am
+++ b/dynare++/kord/Makefile.am
@ -27,15 +27,15 @@ libkord_a_SOURCES = \
 	random.hh

 libkord_a_CPPFLAGS = -I../sylv/cc -I../tl/cc -I../integ/cc -I$(top_srcdir)/mex/sources $(CPPFLAGS_MATIO)
-libkord_a_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+libkord_a_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)

 check_PROGRAMS = tests

 tests_SOURCES = tests.cc
 tests_CPPFLAGS = -I../sylv/cc -I../tl/cc -I../integ/cc -I$(top_srcdir)/mex/sources
-tests_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+tests_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)
 tests_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
-tests_LDADD = libkord.a ../tl/cc/libtl.a ../sylv/cc/libsylv.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(PTHREAD_LIBS) $(LIBADD_MATIO)
+tests_LDADD = libkord.a ../tl/cc/libtl.a ../sylv/cc/libsylv.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(LIBADD_MATIO)

 check-local:
 	./tests
--- a/dynare++/kord/decision_rule.cc
+++ b/dynare++/kord/decision_rule.cc
@ -90,7 +90,7 @@ SimResults::simulate(int num_sim, const DecisionRule &dr, const Vector &start,
  std::vector<RandomShockRealization> rsrs;
  rsrs.reserve(num_sim);

-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  for (int i = 0; i < num_sim; i++)
    {
      RandomShockRealization sr(vcov, system_random_generator.int_uniform());
@ -331,7 +331,7 @@ SimResultsIRF::simulate(const DecisionRule &dr, Journal &journal)
 void
 SimResultsIRF::simulate(const DecisionRule &dr)
 {
-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  for (int idata = 0; idata < control.getNumSets(); idata++)
    gr.insert(std::make_unique<SimulationIRFWorker>(*this, dr, DecisionRule::horner,
                                                    num_per, idata, ishock, imp));
@ -410,7 +410,7 @@ RTSimResultsStats::simulate(int num_sim, const DecisionRule &dr, const Vector &s
  std::vector<RandomShockRealization> rsrs;
  rsrs.reserve(num_sim);

-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  for (int i = 0; i < num_sim; i++)
    {
      RandomShockRealization sr(vcov, system_random_generator.int_uniform());
@ -485,7 +485,7 @@ SimulationWorker::operator()()
  auto *esr = new ExplicitShockRealization(sr, np);
  TwoDMatrix *m = dr.simulate(em, np, st, *esr);
  {
-    SYNCHRO syn(&res, "simulation");
+    sthread::synchro syn(&res, "simulation");
    res.addDataSet(m, esr);
  }
 }
@ -504,7 +504,7 @@ SimulationIRFWorker::operator()()
  TwoDMatrix *m = dr.simulate(em, np, st, *esr);
  m->add(-1.0, res.control.getData(idata));
  {
-    SYNCHRO syn(&res, "simulation");
+    sthread::synchro syn(&res, "simulation");
    res.addDataSet(m, esr);
  }
 }
@ -546,7 +546,7 @@ RTSimulationWorker::operator()()
        nc.update(y);
    }
  {
-    SYNCHRO syn(&res, "rtsimulation");
+    sthread::synchro syn(&res, "rtsimulation");
    res.nc.update(nc);
    if (res.num_per-ip > 0)
      {
--- a/dynare++/kord/decision_rule.hh
+++ b/dynare++/kord/decision_rule.hh
@ -908,7 +908,7 @@ public:
 /* This worker simulates the given decision rule and inserts the result
   to |SimResults|. */

-class SimulationWorker : public THREAD
+class SimulationWorker : public sthread::detach_thread
 {
 protected:
  SimResults &res;
@ -933,7 +933,7 @@ public:
   control simulations are contained in |SimResultsIRF| which is passed
   to the constructor. */

-class SimulationIRFWorker : public THREAD
+class SimulationIRFWorker : public sthread::detach_thread
 {
  SimResultsIRF &res;
  const DecisionRule &dr;
@ -960,7 +960,7 @@ public:
   Inf is observed, it ends the simulation and adds to the
   |thrown_periods| of |RTSimResultsStats|. */

-class RTSimulationWorker : public THREAD
+class RTSimulationWorker : public sthread::detach_thread
 {
 protected:
  RTSimResultsStats &res;
--- a/dynare++/kord/faa_di_bruno.cc
+++ b/dynare++/kord/faa_di_bruno.cc
@ -120,7 +120,7 @@ int
 FaaDiBruno::estimRefinment(const TensorDimens &tdims, int nr, int l,
                           int &avmem_mb, int &tmpmem_mb)
 {
-  int nthreads = THREAD_GROUP::max_parallel_threads;
+  int nthreads = sthread::detach_thread_group::max_parallel_threads;
  long int per_size1 = tdims.calcUnfoldMaxOffset();
  auto per_size2 = (long int) pow((double) tdims.getNVS().getMax(), l);
  double lambda = 0.0;
--- a/dynare++/src/Makefile.am
+++ b/dynare++/src/Makefile.am
@ -23,8 +23,8 @@ dynare___SOURCES = \

 dynare___CPPFLAGS = -I../sylv/cc -I../tl/cc -I../kord -I../integ/cc -I.. -I$(top_srcdir)/mex/sources -DDYNVERSION=\"$(PACKAGE_VERSION)\" $(BOOST_CPPFLAGS) $(CPPFLAGS_MATIO)
 dynare___LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO) $(BOOST_LDFLAGS)
-dynare___LDADD = ../kord/libkord.a ../integ/cc/libinteg.a ../tl/cc/libtl.a ../parser/cc/libparser.a ../utils/cc/libutils.a ../sylv/cc/libsylv.a $(LIBADD_MATIO) $(noinst_LIBRARIES) $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(PTHREAD_LIBS)
-dynare___CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+dynare___LDADD = ../kord/libkord.a ../integ/cc/libinteg.a ../tl/cc/libtl.a ../parser/cc/libparser.a ../utils/cc/libutils.a ../sylv/cc/libsylv.a $(LIBADD_MATIO) $(noinst_LIBRARIES) $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS)
+dynare___CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)

 BUILT_SOURCES = $(GENERATED_FILES)
 EXTRA_DIST = dynglob.ll dynglob.yy
--- a/dynare++/src/main.cc
+++ b/dynare++/src/main.cc
@ -31,7 +31,7 @@ main(int argc, char **argv)
      printf(" for LGPL see http://www.gnu.org/licenses/lgpl.html\n");
      return 0;
    }
-  THREAD_GROUP::max_parallel_threads = params.num_threads;
+  sthread::detach_thread_group::max_parallel_threads = params.num_threads;

  try
    {
@ -118,7 +118,7 @@ main(int argc, char **argv)
      if (params.check_along_path || params.check_along_shocks
          || params.check_on_ellipse)
        {
-          GlobalChecker gcheck(app, THREAD_GROUP::max_parallel_threads, journal);
+          GlobalChecker gcheck(app, sthread::detach_thread_group::max_parallel_threads, journal);
          if (params.check_along_shocks)
            gcheck.checkAlongShocksAndSave(matfd, params.prefix,
                                           params.getCheckShockPoints(),
--- a/dynare++/tl/cc/Makefile.am
+++ b/dynare++/tl/cc/Makefile.am
@ -45,4 +45,4 @@ libtl_a_SOURCES = \
 	twod_matrix.cc \
 	twod_matrix.hh
 libtl_a_CPPFLAGS = -I../../sylv/cc $(CPPFLAGS_MATIO)
-libtl_a_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+libtl_a_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)
--- a/dynare++/tl/cc/stack_container.cc
+++ b/dynare++/tl/cc/stack_container.cc
@ -41,7 +41,7 @@ FoldedStackContainer::multAndAdd(int dim, const FGSContainer &c, FGSTensor &out)
  TL_RAISE_IF(c.num() != numStacks(),
              "Wrong symmetry length of container for FoldedStackContainer::multAndAdd");

-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  SymmetrySet ss(dim, c.num());
  for (symiterator si(ss); !si.isEnd(); ++si)
    if (c.check(*si))
@ -76,7 +76,7 @@ void
 FoldedStackContainer::multAndAddSparse1(const FSSparseTensor &t,
                                        FGSTensor &out) const
 {
-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  UFSTensor dummy(0, numStacks(), t.dimen());
  for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui)
    gr.insert(std::make_unique<WorkerFoldMAASparse1>(*this, t, out, ui.getCoor()));
@ -121,7 +121,7 @@ WorkerFoldMAASparse1::operator()()
                    {
                      FPSTensor fps(out.getDims(), it, slice, kp);
                      {
-                        SYNCHRO syn(&out, "WorkerUnfoldMAASparse1");
+                        sthread::synchro syn(&out, "WorkerUnfoldMAASparse1");
                        fps.addTo(out);
                      }
                    }
@ -148,7 +148,7 @@ void
 FoldedStackContainer::multAndAddSparse2(const FSSparseTensor &t,
                                        FGSTensor &out) const
 {
-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  FFSTensor dummy_f(0, numStacks(), t.dimen());
  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
      gr.insert(std::make_unique<WorkerFoldMAASparse2>(*this, t, out, fi.getCoor()));
@ -244,7 +244,7 @@ FoldedStackContainer::multAndAddSparse3(const FSSparseTensor &t,
 void
 FoldedStackContainer::multAndAddSparse4(const FSSparseTensor &t, FGSTensor &out) const
 {
-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  FFSTensor dummy_f(0, numStacks(), t.dimen());
  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
    gr.insert(std::make_unique<WorkerFoldMAASparse4>(*this, t, out, fi.getCoor()));
@ -310,7 +310,7 @@ FoldedStackContainer::multAndAddStacks(const IntSequence &coor,
                        kp.optimizeOrder();
                      FPSTensor fps(out.getDims(), it, sort_per, ug, kp);
                      {
-                        SYNCHRO syn(ad, "multAndAddStacks");
+                        sthread::synchro syn(ad, "multAndAddStacks");
                        fps.addTo(out);
                      }
                    }
@ -351,7 +351,7 @@ FoldedStackContainer::multAndAddStacks(const IntSequence &coor,
                      KronProdStack<FGSTensor> kp(sp, coor);
                      FPSTensor fps(out.getDims(), it, sort_per, g, kp);
                      {
-                        SYNCHRO syn(ad, "multAndAddStacks");
+                        sthread::synchro syn(ad, "multAndAddStacks");
                        fps.addTo(out);
                      }
                    }
@ -394,7 +394,7 @@ UnfoldedStackContainer::multAndAdd(int dim, const UGSContainer &c,
  TL_RAISE_IF(c.num() != numStacks(),
              "Wrong symmetry length of container for UnfoldedStackContainer::multAndAdd");

-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  SymmetrySet ss(dim, c.num());
  for (symiterator si(ss); !si.isEnd(); ++si)
    if (c.check(*si))
@ -441,7 +441,7 @@ void
 UnfoldedStackContainer::multAndAddSparse1(const FSSparseTensor &t,
                                          UGSTensor &out) const
 {
-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  UFSTensor dummy(0, numStacks(), t.dimen());
  for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui)
    gr.insert(std::make_unique<WorkerUnfoldMAASparse1>(*this, t, out, ui.getCoor()));
@ -503,7 +503,7 @@ WorkerUnfoldMAASparse1::operator()()
                    {
                      UPSTensor ups(out.getDims(), it, slice, kp);
                      {
-                        SYNCHRO syn(&out, "WorkerUnfoldMAASparse1");
+                        sthread::synchro syn(&out, "WorkerUnfoldMAASparse1");
                        ups.addTo(out);
                      }
                    }
@ -545,7 +545,7 @@ void
 UnfoldedStackContainer::multAndAddSparse2(const FSSparseTensor &t,
                                          UGSTensor &out) const
 {
-  THREAD_GROUP gr;
+  sthread::detach_thread_group gr;
  FFSTensor dummy_f(0, numStacks(), t.dimen());
  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
    gr.insert(std::make_unique<WorkerUnfoldMAASparse2>(*this, t, out, fi.getCoor()));
@ -629,7 +629,7 @@ UnfoldedStackContainer::multAndAddStacks(const IntSequence &fi,
                        kp.optimizeOrder();
                      UPSTensor ups(out.getDims(), it, sort_per, g, kp);
                      {
-                        SYNCHRO syn(ad, "multAndAddStacks");
+                        sthread::synchro syn(ad, "multAndAddStacks");
                        ups.addTo(out);
                      }
                    }
--- a/dynare++/tl/cc/stack_container.hh
+++ b/dynare++/tl/cc/stack_container.hh
@ -645,7 +645,7 @@ public:
  }
 };

-class WorkerFoldMAADense : public THREAD
+class WorkerFoldMAADense : public sthread::detach_thread
 {
  const FoldedStackContainer &cont;
  Symmetry sym;
@ -659,7 +659,7 @@ public:
  void operator()() override;
 };

-class WorkerFoldMAASparse1 : public THREAD
+class WorkerFoldMAASparse1 : public sthread::detach_thread
 {
  const FoldedStackContainer &cont;
  const FSSparseTensor &t;
@ -673,7 +673,7 @@ public:
  void operator()() override;
 };

-class WorkerFoldMAASparse2 : public THREAD
+class WorkerFoldMAASparse2 : public sthread::detach_thread
 {
  const FoldedStackContainer &cont;
  const FSSparseTensor &t;
@ -686,7 +686,7 @@ public:
  void operator()() override;
 };

-class WorkerFoldMAASparse4 : public THREAD
+class WorkerFoldMAASparse4 : public sthread::detach_thread
 {
  const FoldedStackContainer &cont;
  const FSSparseTensor &t;
@ -699,7 +699,7 @@ public:
  void operator()() override;
 };

-class WorkerUnfoldMAADense : public THREAD
+class WorkerUnfoldMAADense : public sthread::detach_thread
 {
  const UnfoldedStackContainer &cont;
  Symmetry sym;
@ -713,7 +713,7 @@ public:
  void operator()() override;
 };

-class WorkerUnfoldMAASparse1 : public THREAD
+class WorkerUnfoldMAASparse1 : public sthread::detach_thread
 {
  const UnfoldedStackContainer &cont;
  const FSSparseTensor &t;
@ -727,7 +727,7 @@ public:
  void operator()() override;
 };

-class WorkerUnfoldMAASparse2 : public THREAD
+class WorkerUnfoldMAASparse2 : public sthread::detach_thread
 {
  const UnfoldedStackContainer &cont;
  const FSSparseTensor &t;
--- a/dynare++/tl/cc/sthread.cc
+++ b/dynare++/tl/cc/sthread.cc
@ -1,232 +1,70 @@
 // Copyright 2004, Ondra Kamenik

-/* We set the default values for
-   |max_parallel_threads| for both |posix| and |empty| implementation and
-   both joinable and detach group. For |posix| this defaults to
-   uniprocessor machine with hyper-threading, this is 2. */

-#include <cstring>
 #include "sthread.hh"

-#ifdef HAVE_PTHREAD
 namespace sthread
 {
-  template<>
-  int thread_group<posix>::max_parallel_threads = 2;
-  template<>
-  int detach_thread_group<posix>::max_parallel_threads = 2;
+  /* We set the default value for |max_parallel_threads| to 2, i.e.
+     uniprocessor machine with hyper-threading */
+  int detach_thread_group::max_parallel_threads = 2;

-  // POSIX specializations methods
-  void *posix_thread_function(void *c);
-  template <>
-  void
-  thread_traits<posix>::run(_Ctype *c)
+  /* The constructor acquires the mutex in the map. First it tries to
+     get an exclusive access to the map. Then it increases a number of
+     references of the mutex (if it does not exists, it inserts it). Then
+     unlocks the map, and finally tries to lock the mutex of the map. */
+  synchro::synchro(const void *c, std::string id)
+    : caller{c}, iden{std::move(id)}
  {
-    pthread_create(&(c->getThreadIden()), nullptr, posix_thread_function, (void *) c);
+    mutmap.lock_map();
+    if (!mutmap.check(caller, iden))
+      mutmap.insert(caller, iden);
+    mutmap.get(caller, iden).second++;
+    mutmap.unlock_map();
+    mutmap.get(caller, iden).first.lock();
  }

-  void *posix_detach_thread_function(void *c);
-
-  template <>
-  void
-  thread_traits<posix>::detach_run(_Dtype *c)
+  /* The destructor first locks the map. Then releases the lock,
+     and decreases a number of references. If it is zero, it removes the
+     mutex. */
+  synchro::~synchro()
  {
-    pthread_attr_t attr;
-    pthread_attr_init(&attr);
-    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-    pthread_create(&(c->getThreadIden()), &attr, posix_detach_thread_function, (void *) c);
-    pthread_attr_destroy(&attr);
-  }
-
-  template <>
-  void
-  thread_traits<posix>::exit()
-  {
-    pthread_exit(nullptr);
-  }
-
-  template <>
-  void
-  thread_traits<posix>::join(_Ctype *c)
-  {
-    pthread_join(c->getThreadIden(), nullptr);
-  }
-
-  template <>
-  void
-  mutex_traits<posix>::init(pthread_mutex_t &m)
-  {
-    pthread_mutex_init(&m, nullptr);
-  }
-
-  template <>
-  void
-  mutex_traits<posix>::lock(pthread_mutex_t &m)
-  {
-    pthread_mutex_lock(&m);
-  }
-
-  template <>
-  void
-  mutex_traits<posix>::unlock(pthread_mutex_t &m)
-  {
-    pthread_mutex_unlock(&m);
-  }
-
-  template <>
-  void
-  cond_traits<posix>::init(_Tcond &cond)
-  {
-    pthread_cond_init(&cond, nullptr);
-  }
-
-  template <>
-  void
-  cond_traits<posix>::broadcast(_Tcond &cond)
-  {
-    pthread_cond_broadcast(&cond);
-  }
-
-  template <>
-  void
-  cond_traits<posix>::wait(_Tcond &cond, _Tmutex &mutex)
-  {
-    pthread_cond_wait(&cond, &mutex);
-  }
-
-  template <>
-  void
-  cond_traits<posix>::destroy(_Tcond &cond)
-  {
-    pthread_cond_destroy(&cond);
-  }
-
-  /* Here we instantiate the static map, and construct |PosixSynchro|
-     using that map. */
-
-  static posix_synchro::mutex_map_t posix_mm;
-
-  PosixSynchro::PosixSynchro(const void *c, const char *id)
-    : posix_synchro(c, id, posix_mm)
-  {
-  }
-
-  /* This function is of the type |void* function(void*)| as required by
-     POSIX, but it typecasts its argument and runs |operator()()|. */
-
-  void *
-  posix_thread_function(void *c)
-  {
-    auto *ct
-      = (thread_traits<posix>::_Ctype *)c;
-    try
+    mutmap.lock_map();
+    if (mutmap.check(caller, iden))
      {
-        ct->operator()();
+        mutmap.get(caller, iden).first.unlock();
+        mutmap.get(caller, iden).second--;
+        if (mutmap.get(caller, iden).second == 0)
+          mutmap.remove(caller, iden);
      }
-    catch (...)
-      {
-        ct->exit();
-      }
-    return nullptr;
+    mutmap.unlock_map();
  }

-  void *
-  posix_detach_thread_function(void *c)
+  /* We cycle through all threads in the group, and in each cycle we wait
+     for the change in the |counter|. If the counter indicates less than
+     maximum parallel threads running, then a new thread is run, and the
+     iterator in the list is moved.
+
+     At the end we have to wait for all thread to finish. */
+  void
+  detach_thread_group::run()
  {
-    auto *ct
-      = (thread_traits<posix>::_Dtype *)c;
-    condition_counter<posix> *counter = ct->counter;
-    try
+    std::unique_lock<std::mutex> lk{m};
+    auto it = tlist.begin();
+    while (it != tlist.end())
      {
-        ct->operator()();
+        counter++;
+        std::thread th{[&, it] {
+            // The "it" variable is captured by value, because otherwise the iterator may move
+            (*it)->operator()();
+            std::unique_lock<std::mutex> lk2{m};
+            counter--;
+            std::notify_all_at_thread_exit(cv, std::move(lk2));
+          }};
+        th.detach();
+        ++it;
+        cv.wait(lk, [&] { return counter < max_parallel_threads; });
      }
-    catch (...)
-      {
-        ct->exit();
-      }
-    if (counter)
-      counter->decrease();
-    return nullptr;
+    cv.wait(lk, [&] { return counter == 0; });
  }
 }
-#else
-namespace sthread
-{
-  template<>
-  int thread_group<empty>::max_parallel_threads = 1;
-  template<>
-  int detach_thread_group<empty>::max_parallel_threads = 1;
-
-  // non-threading specialization methods
-  /* The only trait methods we need to work are |thread_traits::run| and
-     |thread_traits::detach_run|, which directly call
-     |operator()()|. Anything other is empty. */
-
-  template <>
-  void
-  thread_traits<empty>::run(_Ctype *c)
-  {
-    c->operator()();
-  }
-  template <>
-  void
-  thread_traits<empty>::detach_run(_Dtype *c)
-  {
-    c->operator()();
-  }
-
-  template <>
-  void
-  thread_traits<empty>::exit()
-  {
-  }
-
-  template <>
-  void
-  thread_traits<empty>::join(_Ctype *c)
-  {
-  }
-
-  template <>
-  void
-  mutex_traits<empty>::init(Empty &m)
-  {
-  }
-
-  template <>
-  void
-  mutex_traits<empty>::lock(Empty &m)
-  {
-  }
-
-  template <>
-  void
-  mutex_traits<empty>::unlock(Empty &m)
-  {
-  }
-
-  template <>
-  void
-  cond_traits<empty>::init(_Tcond &cond)
-  {
-  }
-
-  template <>
-  void
-  cond_traits<empty>::broadcast(_Tcond &cond)
-  {
-  }
-
-  template <>
-  void
-  cond_traits<empty>::wait(_Tcond &cond, _Tmutex &mutex)
-  {
-  }
-
-  template <>
-  void
-  cond_traits<empty>::destroy(_Tcond &cond)
-  {
-  }
-}
-#endif
--- a/dynare++/tl/cc/sthread.hh
+++ b/dynare++/tl/cc/sthread.hh
@ -3,24 +3,18 @@
 // Simple threads.

 /* This file defines types making a simple interface to
-   multi-threading. It follows the classical C++ idioms for traits. We
-   have three sorts of traits. The first is a |thread_traits|, which make
-   interface to thread functions (run, exit, create and join), the second
-   is |mutex_traits|, which make interface to mutexes (create, lock,
-   unlock), and third is |cond_traits|, which make interface to
-   conditions (create, wait, broadcast, and destroy). At present, there
-   are two implementations. The first are POSIX threads, mutexes, and
-   conditions, the second is serial (no parallelization).
+   multi-threading.

-   The file provides the following interfaces templated by the types
-   implementing the threading (like types |pthread_t|, and |pthread_mutex_t|
-   for POSIX thread and mutex):
+   The file provides the following interfaces:
   \unorderedlist
-   \li |thread| is a pure virtual class, which must be inherited and a
+   \li |detach_thread| is a pure virtual class, which must be inherited and a
   method |operator()()| be implemented as the running code of the
-   thread. This code is run as a new thread by calling |run| method.
-   \li |thread_group| allows insertion of |thread|s and running all of
-   them simultaneously joining them. The number of maximum parallel
+   thread.
+   \li |detach_thread_group| allows insertion of |detach_thread|s and running
+   all of them simultaneously. The threads
+   are not joined, they are synchronized by means of a counter counting
+   running threads. A change of the counter is checked by waiting on an
+   associated condition. The number of maximum parallel
   threads can be controlled. See below.
   \li |synchro| object locks a piece of code to be executed only serially
   for a given data and specified entry-point. It locks the code until it
@ -28,212 +22,27 @@
   on the stack of a function which is to be synchronized. The
   synchronization can be subjected to specific data (then a pointer can
   be passed to |synchro|'s constructor), and can be subjected to
-   specific entry-point (then |const char*| is passed to the
+   specific entry-point (then |std::string| is passed to the
   constructor).
-   \li |detach_thread| inherits from |thread| and models a detached
-   thread in contrast to |thread| which models the joinable thread.
-   \li |detach_thread_group| groups the detached threads and runs them. They
-   are not joined, they are synchronized by means of a counter counting
-   running threads. A change of the counter is checked by waiting on an
-   associated condition.
   \endunorderedlist

-   What implementation is selected is governed (at present) by
-   |HAVE_PTHREAD|. If it is defined, then POSIX threads are linked. If
-   it is not defined, then serial implementation is taken. In accordance
-   with this, the header file defines macros |THREAD|, |THREAD_GROUP|,
-   and |SYNCHRO| as the picked specialization of |thread| (or |detach_thread|),
-   |thread_group| (or |detach_thread_group|), and |synchro|.
-
-   The type of implementation is controlled by |thread_impl| integer
-   template parameter, this can be |posix| or |empty|.
-
   The number of maximum parallel threads is controlled via a static
-   member of |thread_group| and |detach_thread_group| classes. */
+   member of the |detach_thread_group| class. */

 #ifndef STHREAD_H
 #define STHREAD_H

-#ifdef HAVE_PTHREAD
-# include <pthread.h>
-#else
-/* Give valid types for POSIX thread types, otherwise the templates fail in empty mode.
-   Don't use typedefs because on some systems |pthread_t| and friends are typedefs even
-   without the include. */
-# define pthread_t void *
-# define pthread_mutex_t void *
-# define pthread_cond_t void *
-#endif
-
-#include <cstdio>
-#include <list>
+#include <vector>
 #include <map>
-#include <type_traits>
 #include <memory>
 #include <utility>
+#include <thread>
+#include <mutex>
+#include <condition_variable>

 namespace sthread
 {
-  using namespace std;
-
-  class Empty
-  {
-  };
-
-  enum { posix, empty};
-
-  template <int thread_impl>
-  class thread;
-  template <int>
-  class detach_thread;
-
-  /* Clear. We have only |run|, |detach_run|, |exit| and |join|, since
-     this is only a simple interface. */
-
-  template <int thread_impl>
-  struct thread_traits
-  {
-    using _Tthread = std::conditional_t<thread_impl == posix, pthread_t, Empty>;
-    using _Ctype = thread<0>;
-    using _Dtype = detach_thread<0>;
-    static void run(_Ctype *c);
-    static void detach_run(_Dtype *c);
-    static void exit();
-    static void join(_Ctype *c);
-  };
-
-  /* The class of |thread| is clear. The user implements |operator()()|,
-     the method |run| runs the user's code as joinable thread, |exit| kills the
-     execution. */
-  template <int thread_impl>
-  class thread
-  {
-    using _Ttraits = thread_traits<0>;
-    using _Tthread = typename _Ttraits::_Tthread;
-    _Tthread th;
-  public:
-    virtual ~thread()
-    = default;
-    _Tthread &
-    getThreadIden()
-    {
-      return th;
-    }
-    const _Tthread &
-    getThreadIden() const
-    {
-      return th;
-    }
-    virtual void operator()() = 0;
-    void
-    run()
-    {
-      _Ttraits::run(this);
-    }
-    void
-    detach_run()
-    {
-      _Ttraits::detach_run(this);
-    }
-    void
-    exit()
-    {
-      _Ttraits::exit();
-    }
-  };
-
-  /* The |thread_group| is also clear. We allow a user to insert the
-     |thread|s, and then launch |run|, which will run all the threads not
-     allowing more than |max_parallel_threads| joining them at the
-     end. This static member can be set from outside. */
-
-  template <int thread_impl>
-  class thread_group
-  {
-    using _Ttraits = thread_traits<thread_impl>;
-    using _Ctype = thread<thread_impl>;
-    list<_Ctype *> tlist;
-    using iterator = typename list<_Ctype *>::iterator;
-  public:
-    static int max_parallel_threads;
-    void
-    insert(_Ctype *c)
-    {
-      tlist.push_back(c);
-    }
-    /* The thread group class maintains list of pointers to threads. It
-       takes responsibility of deallocating the threads. So we implement the
-       destructor. */
-    ~thread_group()
-    {
-      while (!tlist.empty())
-        {
-          delete tlist.front();
-          tlist.pop_front();
-        }
-    }
-    /* Here we run the threads ensuring that not more than
-       |max_parallel_threads| are run in parallel. More over, we do not want
-       to run a too low number of threads, since it is wasting with resource
-       (if there are). Therefore, we run in parallel |max_parallel_threads|
-       batches as long as the remaining threads are greater than the double
-       number. And then the remaining batch (less than |2*max_parallel_threads|)
-       is run half by half. */
-
-    void
-    run()
-    {
-      int rem = tlist.size();
-      iterator pfirst = tlist.begin();
-      while (rem > 2*max_parallel_threads)
-        {
-          pfirst = run_portion(pfirst, max_parallel_threads);
-          rem -= max_parallel_threads;
-        }
-      if (rem > max_parallel_threads)
-        {
-          pfirst = run_portion(pfirst, rem/2);
-          rem -= rem/2;
-        }
-      run_portion(pfirst, rem);
-    }
-
-  private:
-    /* This runs a given number of threads in parallel starting from the
-       given iterator. It returns the first iterator not run. */
-
-    iterator
-    run_portion(iterator start, int n)
-    {
-      int c = 0;
-      for (iterator i = start; c < n; ++i, c++)
-        {
-          (*i)->run();
-        }
-      iterator ret;
-      c = 0;
-      for (ret = start; c < n; ++ret, c++)
-        {
-          _Ttraits::join(*ret);
-        }
-      return ret;
-    }
-  };
-
-
-  /* Clear. We have only |init|, |lock|, and |unlock|. */
-  struct ltmmkey;
-  using mmkey = pair<const void *, const char *>;
-
-  template <int thread_impl>
-  struct mutex_traits
-  {
-    using _Tmutex = std::conditional_t<thread_impl == posix, pthread_mutex_t, Empty>;
-    using mutex_int_map = map<mmkey, pair<_Tmutex, int>, ltmmkey>;
-    static void init(_Tmutex &m);
-    static void lock(_Tmutex &m);
-    static void unlock(_Tmutex &m);
-  };
+  using mmkey = std::pair<const void *, std::string>;

  /* Here we define a map of mutexes keyed by a pair of address, and a
     string. A purpose of the map of mutexes is that, if synchronizing, we
@ -258,230 +67,77 @@ namespace sthread
    operator()(const mmkey &k1, const mmkey &k2) const
    {
      return k1.first < k2.first
-                        || (k1.first == k2.first && strcmp(k1.second, k2.second) < 0);
+                        || (k1.first == k2.first && k1.second < k2.second);
    }
  };

-  template <int thread_impl>
-  class mutex_map :
-    public mutex_traits<thread_impl>::mutex_int_map
+  using mutex_int_map = std::map<mmkey, std::pair<std::mutex, int>, ltmmkey>;
+
+  class mutex_map : public mutex_int_map
  {
-    using _Tmutex = typename mutex_traits<thread_impl>::_Tmutex;
-    using _Mtraits = mutex_traits<thread_impl>;
-    using mmval = pair<_Tmutex, int>;
-    using _Tparent = map<mmkey, mmval, ltmmkey>;
-    using iterator = typename _Tparent::iterator;
-    using _mvtype = typename _Tparent::value_type;
-    _Tmutex m;
+    using mmval = std::pair<std::mutex, int>;
+    std::mutex m;
  public:
-    mutex_map()
-    {
-      _Mtraits::init(m);
-    }
+    mutex_map() = default;
    void
-    insert(const void *c, const char *id, const _Tmutex &m)
+    insert(const void *c, std::string id)
    {
-      _Tparent::insert(_mvtype(mmkey(c, id), mmval(m, 0)));
+      // We cannot use emplace(), because std::mutex is neither copyable nor moveable
+      operator[](mmkey{c, std::move(id)}).second = 0;
    }
    bool
-    check(const void *c, const char *id) const
+    check(const void *c, std::string id) const
    {
-      return _Tparent::find(mmkey(c, id)) != _Tparent::end();
+      return find(mmkey{c, std::move(id)}) != end();
    }
-    /* This returns a pointer to the pair of mutex and count reference number. */
-    mmval *
-    get(const void *c, const char *id)
+    /* This returns the pair of mutex and count reference number. */
+    mmval &
+    get(const void *c, std::string id)
    {
-      auto it = _Tparent::find(mmkey(c, id));
-      if (it == _Tparent::end())
-        return nullptr;
-      return &((*it).second);
+      return operator[](mmkey{c, std::move(id)});
    }

    /* This removes unconditionally the mutex from the map regardless its
       number of references. The only user of this class should be |synchro|
       class, it implementation must not remove referenced mutex. */
-
    void
-    remove(const void *c, const char *id)
+    remove(const void *c, std::string id)
    {
-      auto it = _Tparent::find(mmkey(c, id));
-      if (it != _Tparent::end())
-        this->erase(it);
+      auto it = find(mmkey{c, std::string{id}});
+      if (it != end())
+        erase(it);
    }
    void
    lock_map()
    {
-      _Mtraits::lock(m);
+      m.lock();
    }
    void
    unlock_map()
    {
-      _Mtraits::unlock(m);
+      m.unlock();
    }

  };

+
+  // The global map used by the synchro class
+  static mutex_map mutmap;
+
  /* This is the |synchro| class. The constructor of this class tries to
     lock a mutex for a particular address (identification of data) and
     string (identification of entry-point). If the mutex is already
     locked, it waits until it is unlocked and then returns. The destructor
     releases the lock. The typical use is to construct the object on the
     stacked of the code being synchronized. */
-
-  template <int thread_impl>
  class synchro
  {
-    using _Tmutex = typename mutex_traits<thread_impl>::_Tmutex;
-    using _Mtraits = mutex_traits<0>;
-  public:
-    using mutex_map_t = mutex_map<0>;
  private:
    const void *caller;
-    const char *iden;
-    mutex_map_t &mutmap;
+    const std::string iden;
  public:
-    synchro(const void *c, const char *id, mutex_map_t &mmap)
-      : caller(c), iden(id), mutmap(mmap)
-    {
-      lock();
-    }
-    ~synchro()
-    {
-      unlock();
-    }
-  private:
-    /* The |lock| function acquires the mutex in the map. First it tries to
-       get an exclusive access to the map. Then it increases a number of
-       references of the mutex (if it does not exists, it inserts it). Then
-       unlocks the map, and finally tries to lock the mutex of the map. */
-
-    void
-    lock()
-    {
-      mutmap.lock_map();
-      if (!mutmap.check(caller, iden))
-        {
-          _Tmutex mut;
-          _Mtraits::init(mut);
-          mutmap.insert(caller, iden, mut);
-        }
-      mutmap.get(caller, iden)->second++;
-      mutmap.unlock_map();
-      _Mtraits::lock(mutmap.get(caller, iden)->first);
-    }
-
-    /* The |unlock| function first locks the map. Then releases the lock,
-       and decreases a number of references. If it is zero, it removes the
-       mutex. */
-
-    void
-    unlock()
-    {
-      mutmap.lock_map();
-      if (mutmap.check(caller, iden))
-        {
-          _Mtraits::unlock(mutmap.get(caller, iden)->first);
-          mutmap.get(caller, iden)->second--;
-          if (mutmap.get(caller, iden)->second == 0)
-            mutmap.remove(caller, iden);
-        }
-      mutmap.unlock_map();
-    }
-  };
-
-  /* These are traits for conditions. We need |init|, |broadcast|, |wait|
-     and |destroy|. */
-
-  template <int thread_impl>
-  struct cond_traits
-  {
-    using _Tcond = std::conditional_t<thread_impl == posix, pthread_cond_t, Empty>;
-    using _Tmutex = typename mutex_traits<thread_impl>::_Tmutex;
-    static void init(_Tcond &cond);
-    static void broadcast(_Tcond &cond);
-    static void wait(_Tcond &cond, _Tmutex &mutex);
-    static void destroy(_Tcond &cond);
-  };
-
-  /* Here is the condition counter. It is a counter which starts at 0,
-     and can be increased and decreased. A thread can wait until the
-     counter is changed, this is implemented by condition. After the wait
-     is done, another (or the same) thread, by calling |waitForChange|
-     waits for another change. This can be dangerous, since it is possible
-     to wait for a change which will not happen, because all the threads
-     which can cause the change (by increase of decrease) might had
-     finished. */
-
-  template <int thread_impl>
-  class condition_counter
-  {
-    using _Tmutex = typename mutex_traits<thread_impl>::_Tmutex;
-    using _Tcond = typename cond_traits<thread_impl>::_Tcond;
-    int counter{0};
-    _Tmutex mut;
-    _Tcond cond;
-    bool changed{true};
-  public:
-    /* We initialize the counter to 0, and |changed| flag to |true|, since
-       the counter was change from undefined value to 0. */
-
-    condition_counter()
-       
-    {
-      mutex_traits<thread_impl>::init(mut);
-      cond_traits<thread_impl>::init(cond);
-    }
-
-    /* In destructor, we only release the resources associated with the
-       condition. */
-
-    ~condition_counter()
-    {
-      cond_traits<thread_impl>::destroy(cond);
-    }
-
-    /* When increasing, we lock the mutex, advance the counter, remember it
-       is changed, broadcast, and release the mutex. */
-
-    void
-    increase()
-    {
-      mutex_traits<thread_impl>::lock(mut);
-      counter++;
-      changed = true;
-      cond_traits<thread_impl>::broadcast(cond);
-      mutex_traits<thread_impl>::unlock(mut);
-    }
-
-    /* Same as increase. */
-    void
-    decrease()
-    {
-      mutex_traits<thread_impl>::lock(mut);
-      counter--;
-      changed = true;
-      cond_traits<thread_impl>::broadcast(cond);
-      mutex_traits<thread_impl>::unlock(mut);
-    }
-
-    /* We lock the mutex, and if there was a change since the last call of
-       |waitForChange|, we return immediately, otherwise we wait for the
-       change. The mutex is released. */
-
-    int
-    waitForChange()
-    {
-      mutex_traits<thread_impl>::lock(mut);
-      if (!changed)
-        {
-          cond_traits<thread_impl>::wait(cond, mut);
-        }
-      changed = false;
-      int res = counter;
-      mutex_traits<thread_impl>::unlock(mut);
-      return res;
-    }
+    synchro(const void *c, std::string id);
+    ~synchro();
  };

  /* The detached thread is the same as joinable |thread|. We only
@ -489,122 +145,36 @@ namespace sthread
     a method which installs a counter. The counter is increased and
     decreased on the body of the new thread. */

-  template <int thread_impl>
-  class detach_thread : public thread<thread_impl>
+  class detach_thread
  {
  public:
-    condition_counter<thread_impl> *counter;
-    detach_thread() : counter(nullptr)
-    {
-    }
-    void
-    installCounter(condition_counter<thread_impl> *c)
-    {
-      counter = c;
-    }
-    void
-    run()
-    {
-      thread_traits<thread_impl>::detach_run(this);
-    }
+    virtual ~detach_thread() = default;
+    virtual void operator()() = 0;
  };

  /* The detach thread group is (by interface) the same as
     |thread_group|. The extra thing we have here is the |counter|. The
     implementation of |insert| and |run| is different. */

-  template<int thread_impl>
  class detach_thread_group
  {
-    using _Ttraits = thread_traits<thread_impl>;
-    using _Ctraits = cond_traits<thread_impl>;
-    using _Ctype = detach_thread<thread_impl>;
-    list<unique_ptr<_Ctype>> tlist;
-    using iterator = typename list<unique_ptr<_Ctype>>::iterator;
-    condition_counter<thread_impl> counter;
+    std::vector<std::unique_ptr<detach_thread>> tlist;
+    std::mutex m; // For the condition variable and the counter
+    std::condition_variable cv;
+    int counter{0};
  public:
    static int max_parallel_threads;

-    /* When inserting, the counter is installed to the thread. */
    void
-    insert(unique_ptr<_Ctype> c)
+    insert(std::unique_ptr<detach_thread> c)
    {
-      c->installCounter(&counter);
-      tlist.push_back(move(c));
+      tlist.push_back(std::move(c));
    }

    ~detach_thread_group() = default;

-    /* We cycle through all threads in the group, and in each cycle we wait
-       for the change in the |counter|. If the counter indicates less than
-       maximum parallel threads running, then a new thread is run, and the
-       iterator in the list is moved.
-
-       At the end we have to wait for all thread to finish. */
-
-    void
-    run()
-    {
-      int mpt = max_parallel_threads;
-      auto it = tlist.begin();
-      while (it != tlist.end())
-        {
-          if (counter.waitForChange() < mpt)
-            {
-              counter.increase();
-              (*it)->run();
-              ++it;
-            }
-        }
-      while (counter.waitForChange() > 0)
-        {
-        }
-    }
+    void run();
  };
-
-#ifdef HAVE_PTHREAD
-  // POSIX thread specializations
-  /* Here we only define the specializations for POSIX threads. Then we
-     define the macros. Note that the |PosixSynchro| class construct itself
-     from the static map defined in {\tt sthreads.cpp}. */
-  using PosixThread = detach_thread<posix>;
-  using PosixThreadGroup = detach_thread_group<posix>;
-  using posix_synchro = synchro<posix>;
-  class PosixSynchro : public posix_synchro
-  {
-  public:
-    PosixSynchro(const void *c, const char *id);
-  };
-
-# define THREAD sthread::PosixThread
-# define THREAD_GROUP sthread::PosixThreadGroup
-# define SYNCHRO sthread::PosixSynchro
-
-#else
-  // No threading specializations@>=
-  /* Here we define an empty class and use it as thread and
-     mutex. |NoSynchro| class is also empty, but an empty constructor is
-     declared. The empty destructor is declared only to avoid ``unused
-     variable warning''. */
-  using NoThread = thread<empty>;
-  using NoThreadGroup = thread_group<empty>;
-  using no_synchro = synchro<empty>;
-  class NoSynchro
-  {
-  public:
-    NoSynchro(const void *c, const char *id)
-    {
-    }
-    ~NoSynchro()
-    {
-    }
-  };
-
-# define THREAD sthread::NoThread
-# define THREAD_GROUP sthread::NoThreadGroup
-# define SYNCHRO sthread::NoSynchro
-
-#endif
 };

 #endif
--- a/dynare++/tl/testing/Makefile.am
+++ b/dynare++/tl/testing/Makefile.am
@ -2,9 +2,9 @@ check_PROGRAMS = tests

 tests_SOURCES = factory.cc factory.hh monoms.cc monoms.hh tests.cc
 tests_CPPFLAGS = -I../cc -I../../sylv/cc
-tests_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+tests_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)
 tests_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
-tests_LDADD = ../cc/libtl.a ../../sylv/cc/libsylv.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(PTHREAD_LIBS) $(LIBADD_MATIO)
+tests_LDADD = ../cc/libtl.a ../../sylv/cc/libsylv.a $(LAPACK_LIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS) $(LIBADD_MATIO)

 check-local:
 	./tests
--- a/license.txt
+++ b/license.txt
@ -226,11 +226,6 @@ Files: m4/ax_blas.m4 m4/ax_lapack.m4
 Copyright: 2008 Steven G. Johnson <stevenj@alum.mit.edu>
 License: GPL-3+ with Autoconf exception

-Files: m4/ax_pthread.m4
-Copyright: 2008 Steven G. Johnson <stevenj@alum.mit.edu>
-           2011 Daniel Richard G. <skunk@iSKUNK.ORG>
-License: GPL-3+ with Autoconf exception
-
 Files: m4/ax_boost_base.m4
 Copyright: 2008 Thomas Porschberg <thomas@randspringer.de>
           2009 Peter Adolphs
--- a/m4/ax_cxx11_thread.m4
+++ b/m4/ax_cxx11_thread.m4
@ -0,0 +1,33 @@
+dnl Adds flags needed to compile programs using C++11 threads
+
+dnl Copyright (C) 2019 Dynare Team
+dnl
+dnl This file is part of Dynare.
+dnl
+dnl Dynare is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation, either version 3 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl Dynare is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+
+AC_DEFUN([AX_CXX11_THREAD],
+[
+    AC_REQUIRE([AC_CANONICAL_BUILD])
+    case ${host_os} in
+      *mingw32*)
+        THREAD_CXXFLAGS="-mthreads $THREAD_CXXFLAGS"
+        ;;
+      *)
+        THREAD_CXXFLAGS="-pthread $THREAD_CXXFLAGS"
+        ;;
+    esac
+
+    AC_SUBST(THREAD_CXXFLAGS)
+])
--- a/m4/ax_pthread.m4
+++ b/m4/ax_pthread.m4
@ -1,332 +0,0 @@
-# ===========================================================================
-#        http://www.gnu.org/software/autoconf-archive/ax_pthread.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
-#
-# DESCRIPTION
-#
-#   This macro figures out how to build C programs using POSIX threads. It
-#   sets the PTHREAD_LIBS output variable to the threads library and linker
-#   flags, and the PTHREAD_CFLAGS output variable to any special C compiler
-#   flags that are needed. (The user can also force certain compiler
-#   flags/libs to be tested by setting these environment variables.)
-#
-#   Also sets PTHREAD_CC to any special C compiler that is needed for
-#   multi-threaded programs (defaults to the value of CC otherwise). (This
-#   is necessary on AIX to use the special cc_r compiler alias.)
-#
-#   NOTE: You are assumed to not only compile your program with these flags,
-#   but also link it with them as well. e.g. you should link with
-#   $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
-#
-#   If you are only building threads programs, you may wish to use these
-#   variables in your default LIBS, CFLAGS, and CC:
-#
-#     LIBS="$PTHREAD_LIBS $LIBS"
-#     CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
-#     CC="$PTHREAD_CC"
-#
-#   In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
-#   has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name
-#   (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
-#
-#   Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
-#   PTHREAD_PRIO_INHERIT symbol is defined when compiling with
-#   PTHREAD_CFLAGS.
-#
-#   ACTION-IF-FOUND is a list of shell commands to run if a threads library
-#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
-#   is not found. If ACTION-IF-FOUND is not specified, the default action
-#   will define HAVE_PTHREAD.
-#
-#   Please let the authors know if this macro fails on any platform, or if
-#   you have any other suggestions or comments. This macro was based on work
-#   by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
-#   from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
-#   Alejandro Forero Cuervo to the autoconf macro repository. We are also
-#   grateful for the helpful feedback of numerous users.
-#
-#   Updated for Autoconf 2.68 by Daniel Richard G.
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
-#   Copyright (c) 2011 Daniel Richard G. <skunk@iSKUNK.ORG>
-#
-#   This program is free software: you can redistribute it and/or modify it
-#   under the terms of the GNU General Public License as published by the
-#   Free Software Foundation, either version 3 of the License, or (at your
-#   option) any later version.
-#
-#   This program is distributed in the hope that it will be useful, but
-#   WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-#   Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License along
-#   with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-#   As a special exception, the respective Autoconf Macro's copyright owner
-#   gives unlimited permission to copy, distribute and modify the configure
-#   scripts that are the output of Autoconf when processing the Macro. You
-#   need not follow the terms of the GNU General Public License when using
-#   or distributing such scripts, even though portions of the text of the
-#   Macro appear in them. The GNU General Public License (GPL) does govern
-#   all other use of the material that constitutes the Autoconf Macro.
-#
-#   This special exception to the GPL applies to versions of the Autoconf
-#   Macro released by the Autoconf Archive. When you make and distribute a
-#   modified version of the Autoconf Macro, you may extend this special
-#   exception to the GPL to apply to your modified version as well.
-
-#serial 21
-
-AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
-AC_DEFUN([AX_PTHREAD], [
-AC_REQUIRE([AC_CANONICAL_HOST])
-AC_LANG_PUSH([C])
-ax_pthread_ok=no
-
-# We used to check for pthread.h first, but this fails if pthread.h
-# requires special compiler flags (e.g. on True64 or Sequent).
-# It gets checked for in the link test anyway.
-
-# First of all, check if the user has set any of the PTHREAD_LIBS,
-# etcetera environment variables, and if threads linking works using
-# them:
-if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
-        save_CFLAGS="$CFLAGS"
-        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
-        save_LIBS="$LIBS"
-        LIBS="$PTHREAD_LIBS $LIBS"
-        AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS])
-        AC_TRY_LINK_FUNC([pthread_join], [ax_pthread_ok=yes])
-        AC_MSG_RESULT([$ax_pthread_ok])
-        if test x"$ax_pthread_ok" = xno; then
-                PTHREAD_LIBS=""
-                PTHREAD_CFLAGS=""
-        fi
-        LIBS="$save_LIBS"
-        CFLAGS="$save_CFLAGS"
-fi
-
-# We must check for the threads library under a number of different
-# names; the ordering is very important because some systems
-# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
-# libraries is broken (non-POSIX).
-
-# Create a list of thread flags to try.  Items starting with a "-" are
-# C compiler flags, and other items are library names, except for "none"
-# which indicates that we try without any flags at all, and "pthread-config"
-# which is a program returning the flags for the Pth emulation library.
-
-ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
-
-# The ordering *is* (sometimes) important.  Some notes on the
-# individual items follow:
-
-# pthreads: AIX (must check this before -lpthread)
-# none: in case threads are in libc; should be tried before -Kthread and
-#       other compiler flags to prevent continual compiler warnings
-# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
-# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
-# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
-# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads)
-# -pthreads: Solaris/gcc
-# -mthreads: Mingw32/gcc, Lynx/gcc
-# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
-#      doesn't hurt to check since this sometimes defines pthreads too;
-#      also defines -D_REENTRANT)
-#      ... -mt is also the pthreads flag for HP/aCC
-# pthread: Linux, etcetera
-# --thread-safe: KAI C++
-# pthread-config: use pthread-config program (for GNU Pth library)
-
-case ${host_os} in
-        solaris*)
-
-        # On Solaris (at least, for some versions), libc contains stubbed
-        # (non-functional) versions of the pthreads routines, so link-based
-        # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
-        # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
-        # a function called by this macro, so we could check for that, but
-        # who knows whether they'll stub that too in a future libc.)  So,
-        # we'll just look for -pthreads and -lpthread first:
-
-        ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags"
-        ;;
-
-        darwin*)
-        ax_pthread_flags="-pthread $ax_pthread_flags"
-        ;;
-esac
-
-# Clang doesn't consider unrecognized options an error unless we specify
-# -Werror. We throw in some extra Clang-specific options to ensure that
-# this doesn't happen for GCC, which also accepts -Werror.
-
-AC_MSG_CHECKING([if compiler needs -Werror to reject unknown flags])
-save_CFLAGS="$CFLAGS"
-ax_pthread_extra_flags="-Werror"
-CFLAGS="$CFLAGS $ax_pthread_extra_flags -Wunknown-warning-option -Wsizeof-array-argument"
-AC_COMPILE_IFELSE([AC_LANG_PROGRAM([int foo(void);],[foo()])],
-                  [AC_MSG_RESULT([yes])],
-                  [ax_pthread_extra_flags=
-                   AC_MSG_RESULT([no])])
-CFLAGS="$save_CFLAGS"
-
-if test x"$ax_pthread_ok" = xno; then
-for flag in $ax_pthread_flags; do
-
-        case $flag in
-                none)
-                AC_MSG_CHECKING([whether pthreads work without any flags])
-                ;;
-
-                -*)
-                AC_MSG_CHECKING([whether pthreads work with $flag])
-                PTHREAD_CFLAGS="$flag"
-                ;;
-
-                pthread-config)
-                AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no])
-                if test x"$ax_pthread_config" = xno; then continue; fi
-                PTHREAD_CFLAGS="`pthread-config --cflags`"
-                PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
-                ;;
-
-                *)
-                AC_MSG_CHECKING([for the pthreads library -l$flag])
-                PTHREAD_LIBS="-l$flag"
-                ;;
-        esac
-
-        save_LIBS="$LIBS"
-        save_CFLAGS="$CFLAGS"
-        LIBS="$PTHREAD_LIBS $LIBS"
-        CFLAGS="$CFLAGS $PTHREAD_CFLAGS $ax_pthread_extra_flags"
-
-        # Check for various functions.  We must include pthread.h,
-        # since some functions may be macros.  (On the Sequent, we
-        # need a special flag -Kthread to make this header compile.)
-        # We check for pthread_join because it is in -lpthread on IRIX
-        # while pthread_create is in libc.  We check for pthread_attr_init
-        # due to DEC craziness with -lpthreads.  We check for
-        # pthread_cleanup_push because it is one of the few pthread
-        # functions on Solaris that doesn't have a non-functional libc stub.
-        # We try pthread_create on general principles.
-        AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
-                        static void routine(void *a) { a = 0; }
-                        static void *start_routine(void *a) { return a; }],
-                       [pthread_t th; pthread_attr_t attr;
-                        pthread_create(&th, 0, start_routine, 0);
-                        pthread_join(th, 0);
-                        pthread_attr_init(&attr);
-                        pthread_cleanup_push(routine, 0);
-                        pthread_cleanup_pop(0) /* ; */])],
-                [ax_pthread_ok=yes],
-                [])
-
-        LIBS="$save_LIBS"
-        CFLAGS="$save_CFLAGS"
-
-        AC_MSG_RESULT([$ax_pthread_ok])
-        if test "x$ax_pthread_ok" = xyes; then
-                break;
-        fi
-
-        PTHREAD_LIBS=""
-        PTHREAD_CFLAGS=""
-done
-fi
-
-# Various other checks:
-if test "x$ax_pthread_ok" = xyes; then
-        save_LIBS="$LIBS"
-        LIBS="$PTHREAD_LIBS $LIBS"
-        save_CFLAGS="$CFLAGS"
-        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
-
-        # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
-        AC_MSG_CHECKING([for joinable pthread attribute])
-        attr_name=unknown
-        for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
-            AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
-                           [int attr = $attr; return attr /* ; */])],
-                [attr_name=$attr; break],
-                [])
-        done
-        AC_MSG_RESULT([$attr_name])
-        if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
-            AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$attr_name],
-                               [Define to necessary symbol if this constant
-                                uses a non-standard name on your system.])
-        fi
-
-        AC_MSG_CHECKING([if more special flags are required for pthreads])
-        flag=no
-        case ${host_os} in
-            aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";;
-            osf* | hpux*) flag="-D_REENTRANT";;
-            solaris*)
-            if test "$GCC" = "yes"; then
-                flag="-D_REENTRANT"
-            else
-                # TODO: What about Clang on Solaris?
-                flag="-mt -D_REENTRANT"
-            fi
-            ;;
-        esac
-        AC_MSG_RESULT([$flag])
-        if test "x$flag" != xno; then
-            PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
-        fi
-
-        AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
-            [ax_cv_PTHREAD_PRIO_INHERIT], [
-                AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],
-                                                [[int i = PTHREAD_PRIO_INHERIT;]])],
-                    [ax_cv_PTHREAD_PRIO_INHERIT=yes],
-                    [ax_cv_PTHREAD_PRIO_INHERIT=no])
-            ])
-        AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"],
-            [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])])
-
-        LIBS="$save_LIBS"
-        CFLAGS="$save_CFLAGS"
-
-        # More AIX lossage: compile with *_r variant
-        if test "x$GCC" != xyes; then
-            case $host_os in
-                aix*)
-                AS_CASE(["x/$CC"],
-                  [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
-                  [#handle absolute path differently from PATH based program lookup
-                   AS_CASE(["x$CC"],
-                     [x/*],
-                     [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])],
-                     [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])])
-                ;;
-            esac
-        fi
-fi
-
-test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
-
-AC_SUBST([PTHREAD_LIBS])
-AC_SUBST([PTHREAD_CFLAGS])
-AC_SUBST([PTHREAD_CC])
-
-# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
-if test x"$ax_pthread_ok" = xyes; then
-        ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1])
-        :
-else
-        ax_pthread_ok=no
-        $2
-fi
-AC_LANG_POP
-])dnl AX_PTHREAD
--- a/mex/build/dynare_simul_.am
+++ b/mex/build/dynare_simul_.am
@ -2,10 +2,9 @@ mex_PROGRAMS = dynare_simul_

 dynare_simul__CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/../../../dynare++/sylv/cc -I$(top_srcdir)/../../../dynare++/tl/cc -I$(top_srcdir)/../../../dynare++/kord -I$(top_srcdir)/../../sources $(CPPFLAGS_MATIO)

-dynare_simul__CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+dynare_simul__CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)

-# libdynare++ must come before pthread
 dynare_simul__LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
-dynare_simul__LDADD = ../libdynare++/libdynare++.a $(PTHREAD_LIBS) $(LIBADD_MATIO)
+dynare_simul__LDADD = ../libdynare++/libdynare++.a $(LIBADD_MATIO)

 nodist_dynare_simul__SOURCES = $(top_srcdir)/../../../dynare++/extern/matlab/dynare_simul.cc
--- a/mex/build/k_order_perturbation.am
+++ b/mex/build/k_order_perturbation.am
@ -2,11 +2,10 @@ mex_PROGRAMS = k_order_perturbation

 k_order_perturbation_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/../../../dynare++/src -I$(top_srcdir)/../../../dynare++/kord -I$(top_srcdir)/../../../dynare++/tl/cc -I$(top_srcdir)/../../../dynare++/utils/cc -I$(top_srcdir)/../../../dynare++/sylv/cc -I$(top_srcdir)/../../../dynare++/integ/cc -I$(top_srcdir)/../../sources $(CPPFLAGS_MATIO)

-k_order_perturbation_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+k_order_perturbation_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)

-# libdynare++ must come before pthread
 k_order_perturbation_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
-k_order_perturbation_LDADD = ../libdynare++/libdynare++.a $(PTHREAD_LIBS) $(LIBADD_DLOPEN) $(LIBADD_MATIO)
+k_order_perturbation_LDADD = ../libdynare++/libdynare++.a $(LIBADD_DLOPEN) $(LIBADD_MATIO)

 TOPDIR = $(top_srcdir)/../../sources/k_order_perturbation

--- a/mex/build/libdynare++.am
+++ b/mex/build/libdynare++.am
@ -2,7 +2,7 @@ noinst_LIBRARIES = libdynare++.a

 libdynare___a_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/../../../dynare++/src -I$(top_srcdir)/../../../dynare++/kord -I$(top_srcdir)/../../../dynare++/tl/cc -I$(top_srcdir)/../../../dynare++/utils/cc -I$(top_srcdir)/../../../dynare++/sylv/cc -I$(top_srcdir)/../../../dynare++/integ/cc -I$(top_srcdir)/../../sources $(CPPFLAGS_MATIO)

-libdynare___a_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
+libdynare___a_CXXFLAGS = $(AM_CXXFLAGS) $(THREAD_CXXFLAGS)

 TOPDIR = $(top_srcdir)/../../../dynare++

--- a/mex/build/matlab/configure.ac
+++ b/mex/build/matlab/configure.ac
@ -75,7 +75,8 @@ case ${host_os} in
    CXXFLAGS="$CXXFLAGS -stdlib=libc++"
    ;;
 esac
-AX_PTHREAD
+
+AX_CXX11_THREAD

 # Check for dlopen(), needed by k_order_perturbation DLL
 AC_CHECK_LIB([dl], [dlopen], [LIBADD_DLOPEN="-ldl"], [])
@ -95,9 +96,6 @@ AM_CONDITIONAL([HAVE_SLICOT], [test "x$has_slicot" = "xyes"])
 # On Windows, we want static linking of the external libraries
 case ${host_os} in
  *mingw32*)
-    # We hardcode -lpthread here, otherwise it is not detected by the
-    # AX_PTHREAD macro and therefore not statically linked
-    PTHREAD_LIBS="-Wl,-Bstatic -lpthread -Wl,-Bdynamic"
    GSL_LIBS="-Wl,-Bstatic $GSL_LIBS -Wl,-Bdynamic"
    LIBADD_MATIO="-Wl,-Bstatic $LIBADD_MATIO -Wl,-Bdynamic"
    LIBADD_SLICOT="-Wl,-Bstatic $LIBADD_SLICOT -Wl,-Bdynamic"
@ -107,11 +105,7 @@ esac
 AM_CONDITIONAL([DO_SOMETHING], [test "x$ax_enable_matlab" = "xyes" -a "x$ax_matlab_version_ok" = "xyes" -a "x$ax_mexopts_ok" = "xyes"])

 if test "x$ax_enable_matlab" = "xyes" -a "x$ax_matlab_version_ok" = "xyes" -a "x$ax_mexopts_ok" = "xyes"; then
-  if test x"$ax_pthread_ok" = "xyes"; then
-     BUILD_MEX_MATLAB="yes"
-  else
-     BUILD_MEX_MATLAB="yes (without POSIX threads)"
-  fi
+  BUILD_MEX_MATLAB="yes"
 else
  BUILD_MEX_MATLAB="no (missing MATLAB, or unknown version, or unknown architecture)"
 fi
--- a/mex/build/octave/configure.ac
+++ b/mex/build/octave/configure.ac
@ -48,7 +48,9 @@ AC_PROG_RANLIB
 AX_PROG_LN_S
 AC_PROG_MKDIR_P
 AM_PROG_AR
-AX_PTHREAD
+
+AX_CXX11_THREAD
+
 # Check for dlopen(), needed by k_order_perturbation DLL
 AC_CHECK_LIB([dl], [dlopen], [LIBADD_DLOPEN="-ldl"], [])
 AC_SUBST([LIBADD_DLOPEN])
@ -78,11 +80,7 @@ AC_SUBST([LIBADD_UMFPACK])
 AM_CONDITIONAL([DO_SOMETHING], [test "x$MKOCTFILE" != "x"])

 if test "x$MKOCTFILE" != "x"; then
-  if test x"$ax_pthread_ok" = "xyes"; then
-     BUILD_MEX_OCTAVE="yes"
-  else
-     BUILD_MEX_OCTAVE="yes (without POSIX threads)"
-  fi
+  BUILD_MEX_OCTAVE="yes"
 else
  BUILD_MEX_OCTAVE="no (missing mkoctfile)"
 fi
--- a/mex/sources/k_order_perturbation/k_order_perturbation.cc
+++ b/mex/sources/k_order_perturbation/k_order_perturbation.cc
@ -220,7 +220,7 @@ extern "C" {
    const int nSteps = 0; // Dynare++ solving steps, for time being default to 0 = deterministic steady state
    const double sstol = 1.e-13; //NL solver tolerance from

-    THREAD_GROUP::max_parallel_threads = 2; //params.num_threads;
+    sthread::detach_thread_group::max_parallel_threads = 2; //params.num_threads;

    try
      {