use_dll: parallelize compilation of multiple object files within single MEX file

In particular, it implements dependency tracking in the thread scheduler, so
that multiple MEX files can share object files.

Ref. #41
master
Sébastien Villemot 2022-10-05 18:34:21 +02:00
parent 5cf4729ab0
commit dd66459e5f
No known key found for this signature in database
GPG Key ID: 2CECE9350ECEBE4A
6 changed files with 96 additions and 62 deletions

View File

@ -347,10 +347,13 @@ DynamicModel::writeBlockBytecodeAdditionalDerivatives(BytecodeWriter &code_file,
} }
vector<filesystem::path> vector<filesystem::path>
DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const DynamicModel::writeDynamicPerBlockCFiles(const string &basename, const string &mexext,
const filesystem::path &matlabroot,
const filesystem::path &dynareroot) const
{ {
temporary_terms_t temporary_terms; // Temp terms written so far temporary_terms_t temporary_terms; // Temp terms written so far
vector<filesystem::path> written_src_files; const filesystem::path model_src_dir { filesystem::path{basename} / "model" / "src" };
vector<filesystem::path> compiled_object_files;
for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++) for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++)
{ {
@ -365,8 +368,7 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
int nze_exo = blocks_derivatives_exo[blk].size(); int nze_exo = blocks_derivatives_exo[blk].size();
int nze_exo_det = blocks_derivatives_exo_det[blk].size(); int nze_exo_det = blocks_derivatives_exo_det[blk].size();
string filename = basename + "/model/src/dynamic_" + to_string(blk+1) + ".c"; filesystem::path filename { model_src_dir / ("dynamic_" + to_string(blk+1) + ".c") };
written_src_files.emplace_back(filename);
ofstream output{filename, ios::out | ios::binary}; ofstream output{filename, ios::out | ios::binary};
if (!output.is_open()) if (!output.is_open())
{ {
@ -532,7 +534,12 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
<< "}" << endl; << "}" << endl;
output.close(); output.close();
filename = basename + "/model/src/dynamic_" + to_string(blk+1) + ".h"; // Compile intermediary object under <MODFILE>/model/src/
compiled_object_files.emplace_back(compileMEX(model_src_dir, "dynamic_" + to_string(blk+1),
mexext, { filename }, matlabroot, dynareroot,
false));
filename = model_src_dir / ("dynamic_" + to_string(blk+1) + ".h");
ofstream header_output{filename, ios::out | ios::binary}; ofstream header_output{filename, ios::out | ios::binary};
if (!header_output.is_open()) if (!header_output.is_open())
{ {
@ -542,7 +549,7 @@ DynamicModel::writeDynamicPerBlockCFiles(const string &basename) const
header_output << header.str() << ';' << endl; header_output << header.str() << ';' << endl;
header_output.close(); header_output.close();
} }
return written_src_files; return compiled_object_files;
} }
void void
@ -1145,7 +1152,7 @@ DynamicModel::writeDynamicBlockMFile(const string &basename) const
} }
void void
DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
{ {
string filename = basename + "/model/src/dynamic.c"; string filename = basename + "/model/src/dynamic.c";
@ -1228,8 +1235,8 @@ DynamicModel::writeDynamicBlockCFile(const string &basename, vector<filesystem::
output.close(); output.close();
per_block_src_files.push_back(filename); per_block_object_files.push_back(filename);
compileMEX("+" + basename, "dynamic", mexext, per_block_src_files, matlabroot, dynareroot); compileMEX("+" + basename, "dynamic", mexext, per_block_object_files, matlabroot, dynareroot);
} }
void void
@ -3607,8 +3614,8 @@ DynamicModel::writeDynamicFile(const string &basename, bool block, bool use_dll,
if (use_dll) if (use_dll)
{ {
auto per_block_src_files { writeDynamicPerBlockCFiles(basename) }; auto per_block_object_files { writeDynamicPerBlockCFiles(basename, mexext, matlabroot, dynareroot) };
writeDynamicBlockCFile(basename, move(per_block_src_files), mexext, matlabroot, dynareroot); writeDynamicBlockCFile(basename, move(per_block_object_files), mexext, matlabroot, dynareroot);
} }
else if (julia) else if (julia)
{ {

View File

@ -127,7 +127,7 @@ private:
void writeDynamicBlockMFile(const string &basename) const; void writeDynamicBlockMFile(const string &basename) const;
/* Writes the main dynamic functions of block decomposed model (C version), /* Writes the main dynamic functions of block decomposed model (C version),
then compiles it with the per-block functions into a single MEX */ then compiles it with the per-block functions into a single MEX */
void writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const; void writeDynamicBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
/* Computes the number of nonzero elements in deterministic Jacobian of /* Computes the number of nonzero elements in deterministic Jacobian of
block-decomposed model */ block-decomposed model */
int nzeDeterministicJacobianForBlock(int blk) const; int nzeDeterministicJacobianForBlock(int blk) const;
@ -136,9 +136,9 @@ private:
void writeDynamicPerBlockHelper(int blk, ostream &output, temporary_terms_t &temporary_terms, int nze_stochastic, int nze_deterministic, int nze_exo, int nze_exo_det, int nze_other_endo) const; void writeDynamicPerBlockHelper(int blk, ostream &output, temporary_terms_t &temporary_terms, int nze_stochastic, int nze_deterministic, int nze_exo, int nze_exo_det, int nze_other_endo) const;
//! Writes the per-block dynamic files of block decomposed model (MATLAB version) //! Writes the per-block dynamic files of block decomposed model (MATLAB version)
void writeDynamicPerBlockMFiles(const string &basename) const; void writeDynamicPerBlockMFiles(const string &basename) const;
/* Writes the per-block dynamic files of block decomposed model (C version). /* Writes and compiles the per-block dynamic files of block decomposed model
Returns the list of paths to the generated C source files (not the headers) */ (C version). Returns the list of paths to the compiled object files. */
vector<filesystem::path> writeDynamicPerBlockCFiles(const string &basename) const; vector<filesystem::path> writeDynamicPerBlockCFiles(const string &basename, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
//! Writes the code of the block-decomposed model in virtual machine bytecode //! Writes the code of the block-decomposed model in virtual machine bytecode
void writeDynamicBlockBytecode(const string &basename) const; void writeDynamicBlockBytecode(const string &basename) const;
// Writes derivatives w.r.t. exo, exo det and other endogenous // Writes derivatives w.r.t. exo, exo det and other endogenous

View File

@ -41,6 +41,7 @@ vector<jthread> ModelTree::mex_compilation_threads {};
condition_variable ModelTree::mex_compilation_cv; condition_variable ModelTree::mex_compilation_cv;
mutex ModelTree::mex_compilation_mut; mutex ModelTree::mex_compilation_mut;
unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)}; unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)};
set<filesystem::path> ModelTree::mex_compilation_done;
void void
ModelTree::copyHelper(const ModelTree &m) ModelTree::copyHelper(const ModelTree &m)
@ -1622,8 +1623,8 @@ ModelTree::findGccOnMacos(const string &mexext)
} }
#endif #endif
void filesystem::path
ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link) const
{ {
const string opt_flags = "-O3 -g0 --param ira-max-conflict-table-size=1 -fno-forward-propagate -fno-gcse -fno-dce -fno-dse -fno-tree-fre -fno-tree-pre -fno-tree-cselim -fno-tree-dse -fno-tree-dce -fno-tree-pta -fno-gcse-after-reload"; const string opt_flags = "-O3 -g0 --param ira-max-conflict-table-size=1 -fno-forward-propagate -fno-gcse -fno-dce -fno-dse -fno-tree-fre -fno-tree-pre -fno-tree-cselim -fno-tree-dse -fno-tree-dce -fno-tree-pta -fno-gcse-after-reload";
@ -1708,7 +1709,7 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
} }
} }
filesystem::path binary{output_dir / (funcname + "." + mexext)}; filesystem::path output_filename {output_dir / (output_basename + "." + (link ? mexext : "o"))};
ostringstream cmd; ostringstream cmd;
@ -1738,35 +1739,48 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
if (!user_set_add_flags.empty()) if (!user_set_add_flags.empty())
cmd << user_set_add_flags << " "; cmd << user_set_add_flags << " ";
for (auto &src : src_files) for (auto &f : input_files)
cmd << src << " "; cmd << f << " ";
cmd << "-o " << binary << " "; cmd << "-o " << output_filename << " ";
if (user_set_subst_libs.empty()) if (link)
cmd << libs; {
if (user_set_subst_libs.empty())
cmd << libs;
else
cmd << user_set_subst_libs;
if (!user_set_add_libs.empty())
cmd << " " << user_set_add_libs;
}
else else
cmd << user_set_subst_libs; cmd << " -c";
if (!user_set_add_libs.empty())
cmd << " " << user_set_add_libs;
#ifdef _WIN32 #ifdef _WIN32
cmd << '"'; cmd << '"';
#endif #endif
cout << "Compiling " << funcname << " MEX..." << endl << cmd.str() << endl; cout << "Compiling " << output_filename << endl;
/* The command line must be captured by value by the thread (a reference // The prerequisites are the object files among the input files
would quickly become dangling). And std::ostringstream is not copyable, so set<filesystem::path> prerequisites;
capture a std::string. */ copy_if(input_files.begin(), input_files.end(),
inserter(prerequisites, prerequisites.end()), [](const auto &p)
{
return p.extension() == ".o";
});
// std::ostringstream is not copyable, so capture a std::string
string cmd_str { cmd.str() }; string cmd_str { cmd.str() };
mex_compilation_threads.emplace_back([cmd_str] mex_compilation_threads.emplace_back([cmd_str, output_filename, prerequisites]
{ {
// Wait until a logical processor becomes available /* Wait until a logical processor becomes available and all prerequisites
are done */
unique_lock<mutex> lk {mex_compilation_mut}; unique_lock<mutex> lk {mex_compilation_mut};
mex_compilation_cv.wait(lk, [] mex_compilation_cv.wait(lk, [prerequisites]
{ {
return mex_compilation_available_processors > 0; return mex_compilation_available_processors > 0 &&
includes(mex_compilation_done.begin(), mex_compilation_done.end(),
prerequisites.begin(), prerequisites.end());
}); });
// Signal to other threads that we have grabbed a logical processor // Signal to other threads that we have grabbed a logical processor
mex_compilation_available_processors--; mex_compilation_available_processors--;
@ -1779,11 +1793,15 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// Signal to other threads that we have freed a logical processor /* Signal to other threads that we have freed a logical processor and
completed a possible prerequisite */
lk.lock(); lk.lock();
mex_compilation_available_processors++; mex_compilation_available_processors++;
mex_compilation_cv.notify_one(); mex_compilation_done.insert(output_filename);
mex_compilation_cv.notify_all();
}); });
return output_filename;
} }
void void

View File

@ -338,14 +338,13 @@ private:
// Stores threads for compiling MEX files in parallel // Stores threads for compiling MEX files in parallel
static vector<jthread> mex_compilation_threads; static vector<jthread> mex_compilation_threads;
/* The following three variables implement the synchronization mechanism for /* The following variables implement the thread synchronization mechanism for
limiting the number of concurrent GCC processes. limiting the number of concurrent GCC processes and tracking dependencies
TODO: Replace these three variables with std::counting_semaphore (from between object files. */
C++20) when upgrading to GCC 11 (and adjust included headers
correspondingly). */
static condition_variable mex_compilation_cv; static condition_variable mex_compilation_cv;
static mutex mex_compilation_mut; static mutex mex_compilation_mut;
static unsigned int mex_compilation_available_processors; static unsigned int mex_compilation_available_processors;
static set<filesystem::path> mex_compilation_done; // Object/MEX files already compiled
/* Compute a pseudo-Jacobian whose all elements are either zero or one, /* Compute a pseudo-Jacobian whose all elements are either zero or one,
depending on whether the variable symbolically appears in the equation */ depending on whether the variable symbolically appears in the equation */
@ -495,11 +494,14 @@ private:
//! Finds a suitable GCC compiler on macOS //! Finds a suitable GCC compiler on macOS
static string findGccOnMacos(const string &mexext); static string findGccOnMacos(const string &mexext);
#endif #endif
/* Compiles a MEX file. The compilation is done in a separate asynchronous /* Compiles a MEX file (if link=true) or an object file to be linked later
thread, so the call to this function is not blocking. The number of into a MEX file (if link=false). The compilation is done in a separate
concurrently running GCC processes is dynamically limited to the number of asynchronous thread, so the call to this function is not blocking. The
available logical processors. */ number of concurrently running GCC processes is dynamically limited to the
void compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const; number of available logical processors. The dependency of a linked MEX
file upon intermediary objects is nicely handled. Returns the name of the
output file (to be reused later as input file if link=false). */
filesystem::path compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link = true) const;
public: public:
ModelTree(SymbolTable &symbol_table_arg, ModelTree(SymbolTable &symbol_table_arg,

View File

@ -151,17 +151,19 @@ StaticModel::writeStaticPerBlockMFiles(const string &basename) const
} }
vector<filesystem::path> vector<filesystem::path>
StaticModel::writeStaticPerBlockCFiles(const string &basename) const StaticModel::writeStaticPerBlockCFiles(const string &basename, const string &mexext,
const filesystem::path &matlabroot,
const filesystem::path &dynareroot) const
{ {
temporary_terms_t temporary_terms; // Temp terms written so far temporary_terms_t temporary_terms; // Temp terms written so far
vector<filesystem::path> written_src_files; const filesystem::path model_src_dir { filesystem::path{basename} / "model" / "src" };
vector<filesystem::path> compiled_object_files;
for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++) for (int blk = 0; blk < static_cast<int>(blocks.size()); blk++)
{ {
BlockSimulationType simulation_type = blocks[blk].simulation_type; BlockSimulationType simulation_type = blocks[blk].simulation_type;
string filename = basename + "/model/src/static_" + to_string(blk+1) + ".c"; filesystem::path filename { model_src_dir / ("static_" + to_string(blk+1) + ".c") };
written_src_files.emplace_back(filename);
ofstream output{filename, ios::out | ios::binary}; ofstream output{filename, ios::out | ios::binary};
if (!output.is_open()) if (!output.is_open())
{ {
@ -229,7 +231,12 @@ StaticModel::writeStaticPerBlockCFiles(const string &basename) const
output.close(); output.close();
filename = basename + "/model/src/static_" + to_string(blk+1) + ".h"; // Compile intermediary object under <MODFILE>/model/src/
compiled_object_files.emplace_back(compileMEX(model_src_dir, "static_" + to_string(blk+1),
mexext, { filename }, matlabroot, dynareroot,
false));
filename = model_src_dir / ("static_" + to_string(blk+1) + ".h");
ofstream header_output{filename, ios::out | ios::binary}; ofstream header_output{filename, ios::out | ios::binary};
if (!header_output.is_open()) if (!header_output.is_open())
{ {
@ -239,7 +246,7 @@ StaticModel::writeStaticPerBlockCFiles(const string &basename) const
header_output << header.str() << ';' << endl; header_output << header.str() << ';' << endl;
header_output.close(); header_output.close();
} }
return written_src_files; return compiled_object_files;
} }
void void
@ -959,8 +966,8 @@ StaticModel::writeStaticFile(const string &basename, bool block, bool use_dll, c
if (use_dll) if (use_dll)
{ {
auto per_block_src_files { writeStaticPerBlockCFiles(basename) }; auto per_block_object_files { writeStaticPerBlockCFiles(basename, mexext, matlabroot, dynareroot) };
writeStaticBlockCFile(basename, move(per_block_src_files), mexext, matlabroot, dynareroot); writeStaticBlockCFile(basename, move(per_block_object_files), mexext, matlabroot, dynareroot);
} }
else if (julia) else if (julia)
{ {
@ -1033,7 +1040,7 @@ StaticModel::writeStaticBlockMFile(const string &basename) const
} }
void void
StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const
{ {
string filename = basename + "/model/src/static.c"; string filename = basename + "/model/src/static.c";
@ -1104,8 +1111,8 @@ StaticModel::writeStaticBlockCFile(const string &basename, vector<filesystem::pa
<< "}" << endl; << "}" << endl;
output.close(); output.close();
per_block_src_files.push_back(filename); per_block_object_files.push_back(filename);
compileMEX("+" + basename, "static", mexext, per_block_src_files, matlabroot, dynareroot); compileMEX("+" + basename, "static", mexext, per_block_object_files, matlabroot, dynareroot);
} }
void void

View File

@ -48,7 +48,7 @@ private:
/* Writes the main static functions of block decomposed model (C version), /* Writes the main static functions of block decomposed model (C version),
then compiles it with the per-block functions into a single MEX */ then compiles it with the per-block functions into a single MEX */
void writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_src_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const; void writeStaticBlockCFile(const string &basename, vector<filesystem::path> per_block_object_files, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
//! Helper for writing a per-block static file of block decomposed model //! Helper for writing a per-block static file of block decomposed model
template<ExprNodeOutputType output_type> template<ExprNodeOutputType output_type>
@ -57,9 +57,9 @@ private:
//! Writes the per-block static files of block decomposed model (MATLAB version) //! Writes the per-block static files of block decomposed model (MATLAB version)
void writeStaticPerBlockMFiles(const string &basename) const; void writeStaticPerBlockMFiles(const string &basename) const;
/* Writes the per-block static files of block decomposed model (C version). /* Writes and compiles the per-block static files of block decomposed model
Returns the list of paths to the generated C source files (not the headers) */ (C version). Returns the list of paths to the compiled object files. */
vector<filesystem::path> writeStaticPerBlockCFiles(const string &basename) const; vector<filesystem::path> writeStaticPerBlockCFiles(const string &basename, const string &mexext, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
//! Writes the code of the block-decomposed model in virtual machine bytecode //! Writes the code of the block-decomposed model in virtual machine bytecode
void writeStaticBlockBytecode(const string &basename) const; void writeStaticBlockBytecode(const string &basename) const;