use_dll: rewrite the parallel compilation engine with a fixed number of threads
The previous system would spawn as many threads as there are object files to be compiled (which could lead to hundreds of threads for large block-decomposed models). This could pose a memory usage problem (even when not just waiting, threads require memory for their own stack).master
parent
e801f1a862
commit
fd9902e87b
|
@ -23,6 +23,8 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <regex>
|
||||
#include <thread>
|
||||
#include <algorithm>
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
|
@ -494,6 +496,9 @@ main(int argc, char **argv)
|
|||
if (use_dll)
|
||||
mod_file->use_dll = true;
|
||||
|
||||
if (mod_file->use_dll)
|
||||
ModelTree::initializeMEXCompilationWorkers(max(jthread::hardware_concurrency(), 1U));
|
||||
|
||||
if (json == JsonOutputPointType::parsing)
|
||||
mod_file->writeJsonOutput(basename, json, json_output_mode, onlyjson);
|
||||
|
||||
|
@ -528,7 +533,8 @@ main(int argc, char **argv)
|
|||
compilation (and is not printed in case of compilation failure); also
|
||||
avoids potential issues with destroying the thread synchronization
|
||||
mechanism too soon. */
|
||||
ModelTree::joinMEXCompilationThreads();
|
||||
if (mod_file->use_dll)
|
||||
ModelTree::terminateMEXCompilationWorkers();
|
||||
|
||||
cout << "Preprocessing completed." << endl;
|
||||
return EXIT_SUCCESS;
|
||||
|
|
110
src/ModelTree.cc
110
src/ModelTree.cc
|
@ -37,10 +37,10 @@
|
|||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
vector<jthread> ModelTree::mex_compilation_threads {};
|
||||
vector<jthread> ModelTree::mex_compilation_workers {};
|
||||
condition_variable ModelTree::mex_compilation_cv;
|
||||
mutex ModelTree::mex_compilation_mut;
|
||||
unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)};
|
||||
vector<tuple<filesystem::path, set<filesystem::path>, string>> ModelTree::mex_compilation_queue;
|
||||
set<filesystem::path> ModelTree::mex_compilation_done;
|
||||
|
||||
void
|
||||
|
@ -1626,6 +1626,8 @@ ModelTree::findGccOnMacos(const string &mexext)
|
|||
filesystem::path
|
||||
ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link) const
|
||||
{
|
||||
assert(!mex_compilation_workers.empty());
|
||||
|
||||
const string opt_flags = "-O3 -g0 --param ira-max-conflict-table-size=1 -fno-forward-propagate -fno-gcse -fno-dce -fno-dse -fno-tree-fre -fno-tree-pre -fno-tree-cselim -fno-tree-dse -fno-tree-dce -fno-tree-pta -fno-gcse-after-reload";
|
||||
|
||||
filesystem::path compiler;
|
||||
|
@ -1769,37 +1771,10 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_b
|
|||
return p.extension() == ".o";
|
||||
});
|
||||
|
||||
// std::ostringstream is not copyable, so capture a std::string
|
||||
string cmd_str { cmd.str() };
|
||||
mex_compilation_threads.emplace_back([cmd_str, output_filename, prerequisites]
|
||||
{
|
||||
/* Wait until a logical processor becomes available and all prerequisites
|
||||
are done */
|
||||
unique_lock<mutex> lk {mex_compilation_mut};
|
||||
mex_compilation_cv.wait(lk, [prerequisites]
|
||||
{
|
||||
return mex_compilation_available_processors > 0 &&
|
||||
includes(mex_compilation_done.begin(), mex_compilation_done.end(),
|
||||
prerequisites.begin(), prerequisites.end());
|
||||
});
|
||||
// Signal to other threads that we have grabbed a logical processor
|
||||
mex_compilation_available_processors--;
|
||||
lk.unlock();
|
||||
|
||||
// Effectively compile
|
||||
if (system(cmd_str.c_str()))
|
||||
{
|
||||
cerr << "Compilation failed" << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Signal to other threads that we have freed a logical processor and
|
||||
completed a possible prerequisite */
|
||||
lk.lock();
|
||||
mex_compilation_available_processors++;
|
||||
mex_compilation_done.insert(output_filename);
|
||||
mex_compilation_cv.notify_all();
|
||||
});
|
||||
unique_lock<mutex> lk {mex_compilation_mut};
|
||||
mex_compilation_queue.emplace_back(output_filename, prerequisites, cmd.str());
|
||||
lk.unlock();
|
||||
mex_compilation_cv.notify_one();
|
||||
|
||||
return output_filename;
|
||||
}
|
||||
|
@ -1906,9 +1881,74 @@ ModelTree::writeBlockBytecodeAdditionalDerivatives([[maybe_unused]] BytecodeWrit
|
|||
}
|
||||
|
||||
void
|
||||
ModelTree::joinMEXCompilationThreads()
|
||||
ModelTree::initializeMEXCompilationWorkers(int numworkers)
|
||||
{
|
||||
for (auto &it : mex_compilation_threads)
|
||||
assert(numworkers > 0);
|
||||
assert(mex_compilation_workers.empty());
|
||||
|
||||
cout << "Spawning " << numworkers << " threads for compiling MEX files." << endl;
|
||||
|
||||
for (int i {0}; i < numworkers; i++)
|
||||
mex_compilation_workers.emplace_back([](stop_token stoken)
|
||||
{
|
||||
unique_lock<mutex> lk {mex_compilation_mut};
|
||||
|
||||
look_for_job:
|
||||
for (auto it {mex_compilation_queue.begin()}; it != mex_compilation_queue.end(); ++it)
|
||||
{
|
||||
/* The following is a copy and not a reference, because we need it
|
||||
after erasing it, and also after releasing the lock (at which
|
||||
point the mex_compilation_queue may be modified by others). */
|
||||
const auto [output, prerequisites, cmd] {*it};
|
||||
if (includes(mex_compilation_done.begin(), mex_compilation_done.end(),
|
||||
prerequisites.begin(), prerequisites.end()))
|
||||
{
|
||||
mex_compilation_queue.erase(it);
|
||||
lk.unlock(); // After that point, the iterator may become invalid
|
||||
if (system(cmd.c_str()))
|
||||
{
|
||||
cerr << "Compilation failed" << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
lk.lock();
|
||||
mex_compilation_done.insert(output);
|
||||
/* The object just compiled may be a prerequisite for several
|
||||
other objects, so notify all waiting workers. Also needed to
|
||||
notify the main thread when in
|
||||
ModelTree::terminateMEXCompilationWorkers(). */
|
||||
mex_compilation_cv.notify_all();
|
||||
goto look_for_job;
|
||||
}
|
||||
}
|
||||
|
||||
if (stoken.stop_requested())
|
||||
return;
|
||||
|
||||
mex_compilation_cv.wait(lk);
|
||||
|
||||
goto look_for_job;
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
ModelTree::terminateMEXCompilationWorkers()
|
||||
{
|
||||
// Wait until the queue is empty
|
||||
unique_lock<mutex> lk {mex_compilation_mut};
|
||||
mex_compilation_cv.wait(lk, [] { return mex_compilation_queue.empty(); });
|
||||
|
||||
/* Request stop while still holding the lock, so we are sure that workers are
|
||||
either compiling or waiting right now. Otherwise there could theoretically
|
||||
be a race condition where the condition variable is notified just after
|
||||
the thread has checked for its stoken, and just before it begins waiting;
|
||||
this would be deadlock. */
|
||||
for (auto &it : mex_compilation_workers)
|
||||
it.request_stop();
|
||||
|
||||
lk.unlock();
|
||||
|
||||
mex_compilation_cv.notify_all();
|
||||
for (auto &it : mex_compilation_workers)
|
||||
it.join();
|
||||
}
|
||||
|
||||
|
|
|
@ -339,16 +339,19 @@ private:
|
|||
/*! Maps endogenous type specific IDs to equation numbers */
|
||||
vector<int> endo2eq;
|
||||
|
||||
// Stores threads for compiling MEX files in parallel
|
||||
static vector<jthread> mex_compilation_threads;
|
||||
// Stores workers used for compiling MEX files in parallel
|
||||
static vector<jthread> mex_compilation_workers;
|
||||
|
||||
/* The following variables implement the thread synchronization mechanism for
|
||||
limiting the number of concurrent GCC processes and tracking dependencies
|
||||
between object files. */
|
||||
static condition_variable mex_compilation_cv;
|
||||
static mutex mex_compilation_mut;
|
||||
static unsigned int mex_compilation_available_processors;
|
||||
static set<filesystem::path> mex_compilation_done; // Object/MEX files already compiled
|
||||
/* Object/MEX files waiting to be compiled (with their prerequisites as 2nd
|
||||
element and compilation command as the 3rd element) */
|
||||
static vector<tuple<filesystem::path, set<filesystem::path>, string>> mex_compilation_queue;
|
||||
// Object/MEX files already compiled
|
||||
static set<filesystem::path> mex_compilation_done;
|
||||
|
||||
/* Compute a pseudo-Jacobian whose all elements are either zero or one,
|
||||
depending on whether the variable symbolically appears in the equation */
|
||||
|
@ -499,12 +502,11 @@ private:
|
|||
static string findGccOnMacos(const string &mexext);
|
||||
#endif
|
||||
/* Compiles a MEX file (if link=true) or an object file to be linked later
|
||||
into a MEX file (if link=false). The compilation is done in a separate
|
||||
asynchronous thread, so the call to this function is not blocking. The
|
||||
number of concurrently running GCC processes is dynamically limited to the
|
||||
number of available logical processors. The dependency of a linked MEX
|
||||
file upon intermediary objects is nicely handled. Returns the name of the
|
||||
output file (to be reused later as input file if link=false). */
|
||||
into a MEX file (if link=false). The compilation is done in separate
|
||||
worker threads working in parallel, so the call to this function is not
|
||||
blocking. The dependency of a linked MEX file upon intermediary objects is
|
||||
nicely handled. Returns the name of the output file (to be reused later as
|
||||
input file if link=false). */
|
||||
filesystem::path compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link = true) const;
|
||||
|
||||
public:
|
||||
|
@ -552,8 +554,12 @@ public:
|
|||
If no such equation can be found, throws an ExprNode::MatchFailureExpression */
|
||||
expr_t getRHSFromLHS(expr_t lhs) const;
|
||||
|
||||
// Calls join() on all MEX compilation threads
|
||||
static void joinMEXCompilationThreads();
|
||||
// Initialize the MEX compilation workers
|
||||
static void initializeMEXCompilationWorkers(int numworkers);
|
||||
|
||||
/* Terminates all MEX compilation workers (after they have emptied the
|
||||
waiting queue) */
|
||||
static void terminateMEXCompilationWorkers();
|
||||
|
||||
//! Returns all the equation tags associated to an equation
|
||||
map<string, string>
|
||||
|
|
Loading…
Reference in New Issue