use_dll: parallelize compilation of multiple object files within single MEX file
In particular, it implements dependency tracking in the thread scheduler, so that multiple MEX files can share object files. Ref. #41master
parent
5cf4729ab0
commit
dd66459e5f
|
@ -347,10 +347,13 @@ DynamicModel::writeBlockBytecodeAdditionalDerivatives(BytecodeWriter &code_file,
|
|||
}
|
||||
|
||||
vector<filesystem::path>
|
||||
DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
|
||||
DynamicModel::writeDynamicPerBlockCFiles(const string &basename, const string &mexext,
|
||||
const filesystem::path &matlabroot,
|
||||
const filesystem::path &dynareroot) const
|
||||
{
|
||||
temporary_terms_t temporary_terms; // Temp terms written so far
|
||||
vector<filesystem::path> written_src_files;
|
||||
const filesystem::path model_src_dir { filesystem::path{basename} / "model" / "src" };
|
||||
vector<filesystem::path> compiled_object_files;
|
||||
|
||||
for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++)
|
||||
{
|
||||
|
@ -365,8 +368,7 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
|
|||
int nze_exo = blocks_derivatives_exo[blk].size();
|
||||
int nze_exo_det = blocks_derivatives_exo_det[blk].size();
|
||||
|
||||
string filename = basename + "/model/src/dynamic_" + to_string(blk+1) + ".c";
|
||||
written_src_files.emplace_back(filename);
|
||||
filesystem::path filename { model_src_dir / ("dynamic_" + to_string(blk+1) + ".c") };
|
||||
ofstream output{filename, ios::out | ios::binary};
|
||||
if (!output.is_open())
|
||||
{
|
||||
|
@ -532,7 +534,12 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
|
|||
<< "}" << endl;
|
||||
output.close();
|
||||
|
||||
filename = basename + "/model/src/dynamic_" + to_string(blk+1) + ".h";
|
||||
// Compile intermediary object under <MODFILE>/model/src/
|
||||
compiled_object_files.emplace_back(compileMEX(model_src_dir, "dynamic_" + to_string(blk+1),
|
||||
mexext, { filename }, matlabroot, dynareroot,
|
||||
false));
|
||||
|
||||
filename = model_src_dir / ("dynamic_" + to_string(blk+1) + ".h");
|
||||
ofstream header_output{filename, ios::out | ios::binary};
|
||||
if (!header_output.is_open())
|
||||
{
|
||||
|
@ -542,7 +549,7 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
|
|||
header_output << header.str() << ';' << endl;
|
||||
header_output.close();
|
||||
}
|
||||
return written_src_files;
|
||||
return compiled_object_files;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1145,7 +1152,7 @@ DynamicModel::writeDynamicBlockMFile(const string &basename) const
|
|||
}
|
||||
|
||||
void
|
||||
DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
|
||||
DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
|
||||
{
|
||||
string filename = basename + "/model/src/dynamic.c";
|
||||
|
||||
|
@ -1228,8 +1235,8 @@ DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::
|
|||
|
||||
output.close();
|
||||
|
||||
per_block_src_files.push_back(filename);
|
||||
compileMEX("+" + basename, "dynamic", mexext, per_block_src_files, matlabroot, dynareroot);
|
||||
per_block_object_files.push_back(filename);
|
||||
compileMEX("+" + basename, "dynamic", mexext, per_block_object_files, matlabroot, dynareroot);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -3607,8 +3614,8 @@ DynamicModel::writeDynamicFile(const string &basename, bool block, bool use_dll,
|
|||
|
||||
if (use_dll)
|
||||
{
|
||||
auto per_block_src_files { writeDynamicPerBlockCFiles(basename) };
|
||||
writeDynamicBlockCFile(basename, move(per_block_src_files), mexext, matlabroot, dynareroot);
|
||||
auto per_block_object_files { writeDynamicPerBlockCFiles(basename, mexext, matlabroot, dynareroot) };
|
||||
writeDynamicBlockCFile(basename, move(per_block_object_files), mexext, matlabroot, dynareroot);
|
||||
}
|
||||
else if (julia)
|
||||
{
|
||||
|
|
|
@ -127,7 +127,7 @@ private:
|
|||
void writeDynamicBlockMFile(const string &basename) const;
|
||||
/* Writes the main dynamic functions of block decomposed model (C version),
|
||||
then compiles it with the per-block functions into a single MEX */
|
||||
void writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
void writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
/* Computes the number of nonzero elements in deterministic Jacobian of
|
||||
block-decomposed model */
|
||||
int nzeDeterministicJacobianForBlock(int blk) const;
|
||||
|
@ -136,9 +136,9 @@ private:
|
|||
void writeDynamicPerBlockHelper(int blk, ostream &output, temporary_terms_t &temporary_terms, int nze_stochastic, int nze_deterministic, int nze_exo, int nze_exo_det, int nze_other_endo) const;
|
||||
//! Writes the per-block dynamic files of block decomposed model (MATLAB version)
|
||||
void writeDynamicPerBlockMFiles(const string &basename) const;
|
||||
/* Writes the per-block dynamic files of block decomposed model (C version).
|
||||
Returns the list of paths to the generated C source files (not the headers) */
|
||||
vector<filesystem::path> writeDynamicPerBlockCFiles(const string &basename) const;
|
||||
/* Writes and compiles the per-block dynamic files of block decomposed model
|
||||
(C version). Returns the list of paths to the compiled object files. */
|
||||
vector<filesystem::path> writeDynamicPerBlockCFiles(const string &basename, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
//! Writes the code of the block-decomposed model in virtual machine bytecode
|
||||
void writeDynamicBlockBytecode(const string &basename) const;
|
||||
// Writes derivatives w.r.t. exo, exo det and other endogenous
|
||||
|
|
|
@ -41,6 +41,7 @@ vector<jthread> ModelTree::mex_compilation_threads {};
|
|||
condition_variable ModelTree::mex_compilation_cv;
|
||||
mutex ModelTree::mex_compilation_mut;
|
||||
unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)};
|
||||
set<filesystem::path> ModelTree::mex_compilation_done;
|
||||
|
||||
void
|
||||
ModelTree::copyHelper(const ModelTree &m)
|
||||
|
@ -1622,8 +1623,8 @@ ModelTree::findGccOnMacos(const string &mexext)
|
|||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
|
||||
filesystem::path
|
||||
ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link) const
|
||||
{
|
||||
const string opt_flags = "-O3 -g0 --param ira-max-conflict-table-size=1 -fno-forward-propagate -fno-gcse -fno-dce -fno-dse -fno-tree-fre -fno-tree-pre -fno-tree-cselim -fno-tree-dse -fno-tree-dce -fno-tree-pta -fno-gcse-after-reload";
|
||||
|
||||
|
@ -1708,7 +1709,7 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
|
|||
}
|
||||
}
|
||||
|
||||
filesystem::path binary{output_dir / (funcname + "." + mexext)};
|
||||
filesystem::path output_filename {output_dir / (output_basename + "." + (link ? mexext : "o"))};
|
||||
|
||||
ostringstream cmd;
|
||||
|
||||
|
@ -1738,35 +1739,48 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
|
|||
if (!user_set_add_flags.empty())
|
||||
cmd << user_set_add_flags << " ";
|
||||
|
||||
for (auto &src : src_files)
|
||||
cmd << src << " ";
|
||||
cmd << "-o " << binary << " ";
|
||||
for (auto &f : input_files)
|
||||
cmd << f << " ";
|
||||
cmd << "-o " << output_filename << " ";
|
||||
|
||||
if (user_set_subst_libs.empty())
|
||||
cmd << libs;
|
||||
if (link)
|
||||
{
|
||||
if (user_set_subst_libs.empty())
|
||||
cmd << libs;
|
||||
else
|
||||
cmd << user_set_subst_libs;
|
||||
if (!user_set_add_libs.empty())
|
||||
cmd << " " << user_set_add_libs;
|
||||
}
|
||||
else
|
||||
cmd << user_set_subst_libs;
|
||||
|
||||
if (!user_set_add_libs.empty())
|
||||
cmd << " " << user_set_add_libs;
|
||||
cmd << " -c";
|
||||
|
||||
#ifdef _WIN32
|
||||
cmd << '"';
|
||||
#endif
|
||||
|
||||
cout << "Compiling " << funcname << " MEX..." << endl << cmd.str() << endl;
|
||||
cout << "Compiling " << output_filename << endl;
|
||||
|
||||
/* The command line must be captured by value by the thread (a reference
|
||||
would quickly become dangling). And std::ostringstream is not copyable, so
|
||||
capture a std::string. */
|
||||
// The prerequisites are the object files among the input files
|
||||
set<filesystem::path> prerequisites;
|
||||
copy_if(input_files.begin(), input_files.end(),
|
||||
inserter(prerequisites, prerequisites.end()), [](const auto &p)
|
||||
{
|
||||
return p.extension() == ".o";
|
||||
});
|
||||
|
||||
// std::ostringstream is not copyable, so capture a std::string
|
||||
string cmd_str { cmd.str() };
|
||||
mex_compilation_threads.emplace_back([cmd_str]
|
||||
mex_compilation_threads.emplace_back([cmd_str, output_filename, prerequisites]
|
||||
{
|
||||
// Wait until a logical processor becomes available
|
||||
/* Wait until a logical processor becomes available and all prerequisites
|
||||
are done */
|
||||
unique_lock<mutex> lk {mex_compilation_mut};
|
||||
mex_compilation_cv.wait(lk, []
|
||||
mex_compilation_cv.wait(lk, [prerequisites]
|
||||
{
|
||||
return mex_compilation_available_processors > 0;
|
||||
return mex_compilation_available_processors > 0 &&
|
||||
includes(mex_compilation_done.begin(), mex_compilation_done.end(),
|
||||
prerequisites.begin(), prerequisites.end());
|
||||
});
|
||||
// Signal to other threads that we have grabbed a logical processor
|
||||
mex_compilation_available_processors--;
|
||||
|
@ -1779,11 +1793,15 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
|
|||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Signal to other threads that we have freed a logical processor
|
||||
/* Signal to other threads that we have freed a logical processor and
|
||||
completed a possible prerequisite */
|
||||
lk.lock();
|
||||
mex_compilation_available_processors++;
|
||||
mex_compilation_cv.notify_one();
|
||||
mex_compilation_done.insert(output_filename);
|
||||
mex_compilation_cv.notify_all();
|
||||
});
|
||||
|
||||
return output_filename;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -338,14 +338,13 @@ private:
|
|||
// Stores threads for compiling MEX files in parallel
|
||||
static vector<jthread> mex_compilation_threads;
|
||||
|
||||
/* The following three variables implement the synchronization mechanism for
|
||||
limiting the number of concurrent GCC processes.
|
||||
TODO: Replace these three variables with std::counting_semaphore (from
|
||||
C++20) when upgrading to GCC 11 (and adjust included headers
|
||||
correspondingly). */
|
||||
/* The following variables implement the thread synchronization mechanism for
|
||||
limiting the number of concurrent GCC processes and tracking dependencies
|
||||
between object files. */
|
||||
static condition_variable mex_compilation_cv;
|
||||
static mutex mex_compilation_mut;
|
||||
static unsigned int mex_compilation_available_processors;
|
||||
static set<filesystem::path> mex_compilation_done; // Object/MEX files already compiled
|
||||
|
||||
/* Compute a pseudo-Jacobian whose all elements are either zero or one,
|
||||
depending on whether the variable symbolically appears in the equation */
|
||||
|
@ -495,11 +494,14 @@ private:
|
|||
//! Finds a suitable GCC compiler on macOS
|
||||
static string findGccOnMacos(const string &mexext);
|
||||
#endif
|
||||
/* Compiles a MEX file. The compilation is done in a separate asynchronous
|
||||
thread, so the call to this function is not blocking. The number of
|
||||
concurrently running GCC processes is dynamically limited to the number of
|
||||
available logical processors. */
|
||||
void compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
/* Compiles a MEX file (if link=true) or an object file to be linked later
|
||||
into a MEX file (if link=false). The compilation is done in a separate
|
||||
asynchronous thread, so the call to this function is not blocking. The
|
||||
number of concurrently running GCC processes is dynamically limited to the
|
||||
number of available logical processors. The dependency of a linked MEX
|
||||
file upon intermediary objects is nicely handled. Returns the name of the
|
||||
output file (to be reused later as input file if link=false). */
|
||||
filesystem::path compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link = true) const;
|
||||
|
||||
public:
|
||||
ModelTree(SymbolTable &symbol_table_arg,
|
||||
|
|
|
@ -151,17 +151,19 @@ StaticModel::writeStaticPerBlockMFiles(const string &basename) const
|
|||
}
|
||||
|
||||
vector<filesystem::path>
|
||||
StaticModel::writeStaticPerBlockCFiles(const string &basename) const
|
||||
StaticModel::writeStaticPerBlockCFiles(const string &basename, const string &mexext,
|
||||
const filesystem::path &matlabroot,
|
||||
const filesystem::path &dynareroot) const
|
||||
{
|
||||
temporary_terms_t temporary_terms; // Temp terms written so far
|
||||
vector<filesystem::path> written_src_files;
|
||||
const filesystem::path model_src_dir { filesystem::path{basename} / "model" / "src" };
|
||||
vector<filesystem::path> compiled_object_files;
|
||||
|
||||
for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++)
|
||||
{
|
||||
BlockSimulationType simulation_type = blocks[blk].simulation_type;
|
||||
|
||||
string filename = basename + "/model/src/static_" + to_string(blk+1) + ".c";
|
||||
written_src_files.emplace_back(filename);
|
||||
filesystem::path filename { model_src_dir / ("static_" + to_string(blk+1) + ".c") };
|
||||
ofstream output{filename, ios::out | ios::binary};
|
||||
if (!output.is_open())
|
||||
{
|
||||
|
@ -229,7 +231,12 @@ StaticModel::writeStaticPerBlockCFiles(const string &basename) const
|
|||
|
||||
output.close();
|
||||
|
||||
filename = basename + "/model/src/static_" + to_string(blk+1) + ".h";
|
||||
// Compile intermediary object under <MODFILE>/model/src/
|
||||
compiled_object_files.emplace_back(compileMEX(model_src_dir, "static_" + to_string(blk+1),
|
||||
mexext, { filename }, matlabroot, dynareroot,
|
||||
false));
|
||||
|
||||
filename = model_src_dir / ("static_" + to_string(blk+1) + ".h");
|
||||
ofstream header_output{filename, ios::out | ios::binary};
|
||||
if (!header_output.is_open())
|
||||
{
|
||||
|
@ -239,7 +246,7 @@ StaticModel::writeStaticPerBlockCFiles(const string &basename) const
|
|||
header_output << header.str() << ';' << endl;
|
||||
header_output.close();
|
||||
}
|
||||
return written_src_files;
|
||||
return compiled_object_files;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -959,8 +966,8 @@ StaticModel::writeStaticFile(const string &basename, bool block, bool use_dll, c
|
|||
|
||||
if (use_dll)
|
||||
{
|
||||
auto per_block_src_files { writeStaticPerBlockCFiles(basename) };
|
||||
writeStaticBlockCFile(basename, move(per_block_src_files), mexext, matlabroot, dynareroot);
|
||||
auto per_block_object_files { writeStaticPerBlockCFiles(basename, mexext, matlabroot, dynareroot) };
|
||||
writeStaticBlockCFile(basename, move(per_block_object_files), mexext, matlabroot, dynareroot);
|
||||
}
|
||||
else if (julia)
|
||||
{
|
||||
|
@ -1033,7 +1040,7 @@ StaticModel::writeStaticBlockMFile(const string &basename) const
|
|||
}
|
||||
|
||||
void
|
||||
StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
|
||||
StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
|
||||
{
|
||||
string filename = basename + "/model/src/static.c";
|
||||
|
||||
|
@ -1104,8 +1111,8 @@ StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::pa
|
|||
<< "}" << endl;
|
||||
output.close();
|
||||
|
||||
per_block_src_files.push_back(filename);
|
||||
compileMEX("+" + basename, "static", mexext, per_block_src_files, matlabroot, dynareroot);
|
||||
per_block_object_files.push_back(filename);
|
||||
compileMEX("+" + basename, "static", mexext, per_block_object_files, matlabroot, dynareroot);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -48,7 +48,7 @@ private:
|
|||
|
||||
/* Writes the main static functions of block decomposed model (C version),
|
||||
then compiles it with the per-block functions into a single MEX */
|
||||
void writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
void writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
|
||||
//! Helper for writing a per-block static file of block decomposed model
|
||||
template<ExprNodeOutputType output_type>
|
||||
|
@ -57,9 +57,9 @@ private:
|
|||
//! Writes the per-block static files of block decomposed model (MATLAB version)
|
||||
void writeStaticPerBlockMFiles(const string &basename) const;
|
||||
|
||||
/* Writes the per-block static files of block decomposed model (C version).
|
||||
Returns the list of paths to the generated C source files (not the headers) */
|
||||
vector<filesystem::path> writeStaticPerBlockCFiles(const string &basename) const;
|
||||
/* Writes and compiles the per-block static files of block decomposed model
|
||||
(C version). Returns the list of paths to the compiled object files. */
|
||||
vector<filesystem::path> writeStaticPerBlockCFiles(const string &basename, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
|
||||
|
||||
//! Writes the code of the block-decomposed model in virtual machine bytecode
|
||||
void writeStaticBlockBytecode(const string &basename) const;
|
||||
|
|
Loading…
Reference in New Issue