use_dll: parallelize compilation of multiple object files within single MEX file

In particular, it implements dependency tracking in the thread scheduler, so
that multiple MEX files can share object files.

Ref. #41
master
Sébastien Villemot 2022-10-05 18:34:21 +02:00
parent 5cf4729ab0
commit dd66459e5f
No known key found for this signature in database
GPG Key ID: 2CECE9350ECEBE4A
6 changed files with 96 additions and 62 deletions

View File

@ -347,10 +347,13 @@ DynamicModel::writeBlockBytecodeAdditionalDerivatives(BytecodeWriter &code_file,
}
vector<filesystem::path>
DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
DynamicModel::writeDynamicPerBlockCFiles(const string &basename, const string &mexext,
const filesystem::path &matlabroot,
const filesystem::path &dynareroot) const
{
temporary_terms_t temporary_terms; // Temp terms written so far
vector<filesystem::path> written_src_files;
const filesystem::path model_src_dir { filesystem::path{basename} / "model" / "src" };
vector<filesystem::path> compiled_object_files;
for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++)
{
@ -365,8 +368,7 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
int nze_exo = blocks_derivatives_exo[blk].size();
int nze_exo_det = blocks_derivatives_exo_det[blk].size();
string filename = basename + "/model/src/dynamic_" + to_string(blk+1) + ".c";
written_src_files.emplace_back(filename);
filesystem::path filename { model_src_dir / ("dynamic_" + to_string(blk+1) + ".c") };
ofstream output{filename, ios::out | ios::binary};
if (!output.is_open())
{
@ -532,7 +534,12 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
<< "}" << endl;
output.close();
filename = basename + "/model/src/dynamic_" + to_string(blk+1) + ".h";
// Compile intermediary object under <MODFILE>/model/src/
compiled_object_files.emplace_back(compileMEX(model_src_dir, "dynamic_" + to_string(blk+1),
mexext, { filename }, matlabroot, dynareroot,
false));
filename = model_src_dir / ("dynamic_" + to_string(blk+1) + ".h");
ofstream header_output{filename, ios::out | ios::binary};
if (!header_output.is_open())
{
@ -542,7 +549,7 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
header_output << header.str() << ';' << endl;
header_output.close();
}
return written_src_files;
return compiled_object_files;
}
void
@ -1145,7 +1152,7 @@ DynamicModel::writeDynamicBlockMFile(const string &basename) const
}
void
DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
{
string filename = basename + "/model/src/dynamic.c";
@ -1228,8 +1235,8 @@ DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::
output.close();
per_block_src_files.push_back(filename);
compileMEX("+" + basename, "dynamic", mexext, per_block_src_files, matlabroot, dynareroot);
per_block_object_files.push_back(filename);
compileMEX("+" + basename, "dynamic", mexext, per_block_object_files, matlabroot, dynareroot);
}
void
@ -3607,8 +3614,8 @@ DynamicModel::writeDynamicFile(const string &basename, bool block, bool use_dll,
if (use_dll)
{
auto per_block_src_files { writeDynamicPerBlockCFiles(basename) };
writeDynamicBlockCFile(basename, move(per_block_src_files), mexext, matlabroot, dynareroot);
auto per_block_object_files { writeDynamicPerBlockCFiles(basename, mexext, matlabroot, dynareroot) };
writeDynamicBlockCFile(basename, move(per_block_object_files), mexext, matlabroot, dynareroot);
}
else if (julia)
{

View File

@ -127,7 +127,7 @@ private:
void writeDynamicBlockMFile(const string &basename) const;
/* Writes the main dynamic functions of block decomposed model (C version),
then compiles it with the per-block functions into a single MEX */
void writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
void writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
/* Computes the number of nonzero elements in deterministic Jacobian of
block-decomposed model */
int nzeDeterministicJacobianForBlock(int blk) const;
@ -136,9 +136,9 @@ private:
void writeDynamicPerBlockHelper(int blk, ostream &output, temporary_terms_t &temporary_terms, int nze_stochastic, int nze_deterministic, int nze_exo, int nze_exo_det, int nze_other_endo) const;
//! Writes the per-block dynamic files of block decomposed model (MATLAB version)
void writeDynamicPerBlockMFiles(const string &basename) const;
/* Writes the per-block dynamic files of block decomposed model (C version).
Returns the list of paths to the generated C source files (not the headers) */
vector<filesystem::path> writeDynamicPerBlockCFiles(const string &basename) const;
/* Writes and compiles the per-block dynamic files of block decomposed model
(C version). Returns the list of paths to the compiled object files. */
vector<filesystem::path> writeDynamicPerBlockCFiles(const string &basename, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
//! Writes the code of the block-decomposed model in virtual machine bytecode
void writeDynamicBlockBytecode(const string &basename) const;
// Writes derivatives w.r.t. exo, exo det and other endogenous

View File

@ -41,6 +41,7 @@ vector<jthread> ModelTree::mex_compilation_threads {};
condition_variable ModelTree::mex_compilation_cv;
mutex ModelTree::mex_compilation_mut;
unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)};
set<filesystem::path> ModelTree::mex_compilation_done;
void
ModelTree::copyHelper(const ModelTree &m)
@ -1622,8 +1623,8 @@ ModelTree::findGccOnMacos(const string &mexext)
}
#endif
void
ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
filesystem::path
ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link) const
{
const string opt_flags = "-O3 -g0 --param ira-max-conflict-table-size=1 -fno-forward-propagate -fno-gcse -fno-dce -fno-dse -fno-tree-fre -fno-tree-pre -fno-tree-cselim -fno-tree-dse -fno-tree-dce -fno-tree-pta -fno-gcse-after-reload";
@ -1708,7 +1709,7 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
}
}
filesystem::path binary{output_dir / (funcname + "." + mexext)};
filesystem::path output_filename {output_dir / (output_basename + "." + (link ? mexext : "o"))};
ostringstream cmd;
@ -1738,35 +1739,48 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
if (!user_set_add_flags.empty())
cmd << user_set_add_flags << " ";
for (auto &src : src_files)
cmd << src << " ";
cmd << "-o " << binary << " ";
for (auto &f : input_files)
cmd << f << " ";
cmd << "-o " << output_filename << " ";
if (user_set_subst_libs.empty())
cmd << libs;
if (link)
{
if (user_set_subst_libs.empty())
cmd << libs;
else
cmd << user_set_subst_libs;
if (!user_set_add_libs.empty())
cmd << " " << user_set_add_libs;
}
else
cmd << user_set_subst_libs;
if (!user_set_add_libs.empty())
cmd << " " << user_set_add_libs;
cmd << " -c";
#ifdef _WIN32
cmd << '"';
#endif
cout << "Compiling " << funcname << " MEX..." << endl << cmd.str() << endl;
cout << "Compiling " << output_filename << endl;
/* The command line must be captured by value by the thread (a reference
would quickly become dangling). And std::ostringstream is not copyable, so
capture a std::string. */
// The prerequisites are the object files among the input files
set<filesystem::path> prerequisites;
copy_if(input_files.begin(), input_files.end(),
inserter(prerequisites, prerequisites.end()), [](const auto &p)
{
return p.extension() == ".o";
});
// std::ostringstream is not copyable, so capture a std::string
string cmd_str { cmd.str() };
mex_compilation_threads.emplace_back([cmd_str]
mex_compilation_threads.emplace_back([cmd_str, output_filename, prerequisites]
{
// Wait until a logical processor becomes available
/* Wait until a logical processor becomes available and all prerequisites
are done */
unique_lock<mutex> lk {mex_compilation_mut};
mex_compilation_cv.wait(lk, []
mex_compilation_cv.wait(lk, [prerequisites]
{
return mex_compilation_available_processors > 0;
return mex_compilation_available_processors > 0 &&
includes(mex_compilation_done.begin(), mex_compilation_done.end(),
prerequisites.begin(), prerequisites.end());
});
// Signal to other threads that we have grabbed a logical processor
mex_compilation_available_processors--;
@ -1779,11 +1793,15 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
exit(EXIT_FAILURE);
}
// Signal to other threads that we have freed a logical processor
/* Signal to other threads that we have freed a logical processor and
completed a possible prerequisite */
lk.lock();
mex_compilation_available_processors++;
mex_compilation_cv.notify_one();
mex_compilation_done.insert(output_filename);
mex_compilation_cv.notify_all();
});
return output_filename;
}
void

View File

@ -338,14 +338,13 @@ private:
// Stores threads for compiling MEX files in parallel
static vector<jthread> mex_compilation_threads;
/* The following three variables implement the synchronization mechanism for
limiting the number of concurrent GCC processes.
TODO: Replace these three variables with std::counting_semaphore (from
C++20) when upgrading to GCC 11 (and adjust included headers
correspondingly). */
/* The following variables implement the thread synchronization mechanism for
limiting the number of concurrent GCC processes and tracking dependencies
between object files. */
static condition_variable mex_compilation_cv;
static mutex mex_compilation_mut;
static unsigned int mex_compilation_available_processors;
static set<filesystem::path> mex_compilation_done; // Object/MEX files already compiled
/* Compute a pseudo-Jacobian whose all elements are either zero or one,
depending on whether the variable symbolically appears in the equation */
@ -495,11 +494,14 @@ private:
//! Finds a suitable GCC compiler on macOS
static string findGccOnMacos(const string &mexext);
#endif
/* Compiles a MEX file. The compilation is done in a separate asynchronous
thread, so the call to this function is not blocking. The number of
concurrently running GCC processes is dynamically limited to the number of
available logical processors. */
void compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
/* Compiles a MEX file (if link=true) or an object file to be linked later
into a MEX file (if link=false). The compilation is done in a separate
asynchronous thread, so the call to this function is not blocking. The
number of concurrently running GCC processes is dynamically limited to the
number of available logical processors. The dependency of a linked MEX
file upon intermediary objects is nicely handled. Returns the name of the
output file (to be reused later as input file if link=false). */
filesystem::path compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link = true) const;
public:
ModelTree(SymbolTable &symbol_table_arg,

View File

@ -151,17 +151,19 @@ StaticModel::writeStaticPerBlockMFiles(const string &basename) const
}
vector<filesystem::path>
StaticModel::writeStaticPerBlockCFiles(const string &basename) const
StaticModel::writeStaticPerBlockCFiles(const string &basename, const string &mexext,
const filesystem::path &matlabroot,
const filesystem::path &dynareroot) const
{
temporary_terms_t temporary_terms; // Temp terms written so far
vector<filesystem::path> written_src_files;
const filesystem::path model_src_dir { filesystem::path{basename} / "model" / "src" };
vector<filesystem::path> compiled_object_files;
for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++)
{
BlockSimulationType simulation_type = blocks[blk].simulation_type;
string filename = basename + "/model/src/static_" + to_string(blk+1) + ".c";
written_src_files.emplace_back(filename);
filesystem::path filename { model_src_dir / ("static_" + to_string(blk+1) + ".c") };
ofstream output{filename, ios::out | ios::binary};
if (!output.is_open())
{
@ -229,7 +231,12 @@ StaticModel::writeStaticPerBlockCFiles(const string &basename) const
output.close();
filename = basename + "/model/src/static_" + to_string(blk+1) + ".h";
// Compile intermediary object under <MODFILE>/model/src/
compiled_object_files.emplace_back(compileMEX(model_src_dir, "static_" + to_string(blk+1),
mexext, { filename }, matlabroot, dynareroot,
false));
filename = model_src_dir / ("static_" + to_string(blk+1) + ".h");
ofstream header_output{filename, ios::out | ios::binary};
if (!header_output.is_open())
{
@ -239,7 +246,7 @@ StaticModel::writeStaticPerBlockCFiles(const string &basename) const
header_output << header.str() << ';' << endl;
header_output.close();
}
return written_src_files;
return compiled_object_files;
}
void
@ -959,8 +966,8 @@ StaticModel::writeStaticFile(const string &basename, bool block, bool use_dll, c
if (use_dll)
{
auto per_block_src_files { writeStaticPerBlockCFiles(basename) };
writeStaticBlockCFile(basename, move(per_block_src_files), mexext, matlabroot, dynareroot);
auto per_block_object_files { writeStaticPerBlockCFiles(basename, mexext, matlabroot, dynareroot) };
writeStaticBlockCFile(basename, move(per_block_object_files), mexext, matlabroot, dynareroot);
}
else if (julia)
{
@ -1033,7 +1040,7 @@ StaticModel::writeStaticBlockMFile(const string &basename) const
}
void
StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
{
string filename = basename + "/model/src/static.c";
@ -1104,8 +1111,8 @@ StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::pa
<< "}" << endl;
output.close();
per_block_src_files.push_back(filename);
compileMEX("+" + basename, "static", mexext, per_block_src_files, matlabroot, dynareroot);
per_block_object_files.push_back(filename);
compileMEX("+" + basename, "static", mexext, per_block_object_files, matlabroot, dynareroot);
}
void

View File

@ -48,7 +48,7 @@ private:
/* Writes the main static functions of block decomposed model (C version),
then compiles it with the per-block functions into a single MEX */
void writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
void writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
//! Helper for writing a per-block static file of block decomposed model
template<ExprNodeOutputType output_type>
@ -57,9 +57,9 @@ private:
//! Writes the per-block static files of block decomposed model (MATLAB version)
void writeStaticPerBlockMFiles(const string &basename) const;
/* Writes the per-block static files of block decomposed model (C version).
Returns the list of paths to the generated C source files (not the headers) */
vector<filesystem::path> writeStaticPerBlockCFiles(const string &basename) const;
/* Writes and compiles the per-block static files of block decomposed model
(C version). Returns the list of paths to the compiled object files. */
vector<filesystem::path> writeStaticPerBlockCFiles(const string &basename, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
//! Writes the code of the block-decomposed model in virtual machine bytecode
void writeStaticBlockBytecode(const string &basename) const;