From ece70775abb4efbda8d38b1bf751215a9d0d7c7d Mon Sep 17 00:00:00 2001 From: Marc Paterno Date: Tue, 23 Jun 2026 16:01:39 -0500 Subject: [PATCH 1/5] docs(node_catalog): document registration and module loading --- phlex/core/node_catalog.hpp | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/phlex/core/node_catalog.hpp b/phlex/core/node_catalog.hpp index 274a50893..27709c397 100644 --- a/phlex/core/node_catalog.hpp +++ b/phlex/core/node_catalog.hpp @@ -22,7 +22,66 @@ #include namespace phlex::experimental { + // A node_catalog is the framework_graph's registry of all the algorithm nodes + // that make up a Phlex data-processing application. It holds every kind of + // node the framework knows about (predicates, observers, outputs, folds, + // unfolds, transforms, providers, and sources), grouped by type, and serves + // as the single source of node information used to build and run the flow + // graph. + + // The externally visible entry point for plugin modules, with the signature: + // extern "C" void create_module( + // phlex::experimental::module_graph_proxy m, + // phlex::configuration const& config) + // + // User plugins define this function via the PHLEX_REGISTER_ALGORITHMS macro, + // which expands to the extern "C" entry point. The body invokes module methods + // like transform(), predicate(), fold(), unfold(), observe(), and output() on + // the 'm' argument, which add nodes to this node_catalog. + // + // During application initialization, the framework loads each configured + // plugin shared library (PHLEX_PLUGIN_PATH), resolves the create_module entry + // point via Boost.DLL, and invokes it to populate this node_catalog before + // building the flow graph. + struct PHLEX_CORE_EXPORT node_catalog { + // How nodes are added to the node_catalog + // --------------------------------------- + // The node_catalog is populated as a side effect of *registration*, i.e. + // when user algorithms are declared to be part of the framework_graph. + // There are three insertion paths, all of which ultimately insert a node + // into one of the simple_ptr_map members: + // + // 1. Algorithm registration (transform, predicate, observe, fold, + // unfold, provide, output): Each declaration creates a short-lived + // *_api builder holding a registrar bound to the appropriate map (see + // registrar_for()). For all but output nodes, the registrar's + // destructor creates the node and inserts in at the end of the + // registration statement. For output nodes, an explicit call does + // this. + // 2. Sources: glue::source() inserts directly into `sources`, bypassing + // the registrar. + // 3. Implicit provider_node objects: make_computational_edges() creates + // and inserts into `providers` later, during graph finalization, to + // satisfy otherwise unrequited inputs. + // + // Module (plugin) loading time + // ---------------------------- + // Most registrations originate from dynamically loaded *modules*. At graph + // construction time, load_module() (phlex/app/load_module.cpp) loads each + // configured plugin shared library found on PHLEX_PLUGIN_PATH. For each + // plugin, the `create_module` entry point is found (usually defined via the + // macro PHLEX_REGISTER_ALGORITHMS). + // + // The framework then invokes `create_module`, passing a module_graph_proxy + // that forwards fold/observe/predicate/transform/unfold/output calls into + // this catalog. The plugin's registration code therefore runs during + // single-threaded graph construction and adds its nodes here via the paths + // described above. + + // Note: The loaded libraries' factory functions are kept alive for the + // lifetime of the job so the libraries are not unloaded out from under the + // registered nodes. template auto registrar_for(std::vector& errors) { From cd505b278c1aa7bf88c35aefebc941c6f7fd341c Mon Sep 17 00:00:00 2001 From: Marc Paterno Date: Tue, 23 Jun 2026 16:07:30 -0500 Subject: [PATCH 2/5] refactor(core): prevent accidental copying of node_catalog Added explicit deleted copy constructor and assignment operator to prevent node_catalog instances from being copied. A framework_graph's node_catalog is meant to be unique and should not be duplicated. --- phlex/core/node_catalog.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/phlex/core/node_catalog.hpp b/phlex/core/node_catalog.hpp index 27709c397..f26f164b4 100644 --- a/phlex/core/node_catalog.hpp +++ b/phlex/core/node_catalog.hpp @@ -82,6 +82,14 @@ namespace phlex::experimental { // Note: The loaded libraries' factory functions are kept alive for the // lifetime of the job so the libraries are not unloaded out from under the // registered nodes. + + // Only the framework_graph for an application is intended to have a + // node_catalog; it should not get copied or assigned, so we disable copying + // and moving to prevent accidental use. + node_catalog() = default; + node_catalog(node_catalog const&) = delete; + node_catalog& operator=(node_catalog const&) = delete; + template auto registrar_for(std::vector& errors) { From 7da3b3d596836b063dcb7ae156697b9ff16cd56a Mon Sep 17 00:00:00 2001 From: Marc Paterno Date: Tue, 23 Jun 2026 16:09:15 -0500 Subject: [PATCH 3/5] refactor(app): remove boost namespace using directive Removed using namespace boost; and made all boost namespace references explicit by qualifying them with boost:: prefix. This improves code readability and searchability. --- phlex/app/load_module.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/phlex/app/load_module.cpp b/phlex/app/load_module.cpp index 48d52f72c..e6785396b 100644 --- a/phlex/app/load_module.cpp +++ b/phlex/app/load_module.cpp @@ -39,9 +39,8 @@ namespace phlex::experimental { if (!plugin_path_ptr) throw std::runtime_error("PHLEX_PLUGIN_PATH has not been set."); - using namespace boost; std::vector subdirs; - split(subdirs, plugin_path_ptr, is_any_of(":")); + boost::split(subdirs, plugin_path_ptr, boost::is_any_of(":")); // FIXME: Need to test to ensure that first match wins. for (auto const& subdir : subdirs) { @@ -50,9 +49,9 @@ namespace phlex::experimental { if (exists(shared_library_path)) { // Load pymodule with rtld_global to make Python symbols available to extension modules // (e.g., NumPy). Load all other plugins with rtld_local (default) to avoid symbol collisions. - auto const load_mode = - (spec == pymodule_name) ? dll::load_mode::rtld_global : dll::load_mode::default_mode; - return dll::import_symbol(shared_library_path, symbol_name, load_mode); + auto const load_mode = (spec == pymodule_name) ? boost::dll::load_mode::rtld_global + : boost::dll::load_mode::default_mode; + return boost::dll::import_symbol(shared_library_path, symbol_name, load_mode); } } throw std::runtime_error("Could not locate library with specification '"s + spec + From 93d573f6bf7d23157a0301a9ad01734d5e1751fc Mon Sep 17 00:00:00 2001 From: Marc Paterno Date: Tue, 23 Jun 2026 17:51:10 -0500 Subject: [PATCH 4/5] refactor(load_module): hold shared_library explicitly in plugin structs Replace boost::dll::import_symbol with three explicit wrapper structs (module_plugin, source_plugin, driver_plugin), each owning a boost::dll::shared_library and a raw function pointer. This makes library lifetime explicit rather than relying on std::function to keep the loaded .so alive implicitly. plugin_loader now returns std::pair instead of std::function. create_driver changes from std::function to std::optional. --- phlex/app/load_module.cpp | 68 ++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/phlex/app/load_module.cpp b/phlex/app/load_module.cpp index e6785396b..812d2c376 100644 --- a/phlex/app/load_module.cpp +++ b/phlex/app/load_module.cpp @@ -6,11 +6,11 @@ #include "phlex/source.hpp" #include "boost/algorithm/string.hpp" -#include "boost/dll/import.hpp" +#include "boost/dll/shared_library.hpp" #include "boost/json.hpp" #include -#include +#include #include #include @@ -21,17 +21,52 @@ namespace phlex::experimental { namespace { constexpr std::string_view pymodule_name{"pymodule"}; - // If factory function goes out of scope, then the library is unloaded...and that's - // bad. + // The shared_library member in each wrapper struct keeps the loaded .so + // alive for the lifetime of the wrapper. If it goes out of scope, the + // library is unloaded and the stored function pointer becomes invalid. + struct module_plugin { + boost::dll::shared_library lib; + detail::module_creator_t* fn{}; + + void operator()(module_graph_proxy proxy, + configuration const& config) const + { + fn(std::move(proxy), config); + } + }; + + struct source_plugin { + boost::dll::shared_library lib; + detail::source_creator_t* fn{}; + + void operator()(source_bundle bundle, configuration const& config) const + { + fn(bundle, config); + } + }; + + struct driver_plugin { + boost::dll::shared_library lib; + detail::driver_shim_t* fn{}; + + void operator()(driver_proxy proxy, + configuration const& config, + driver_bundle* out) const + { + fn(proxy, config, out); + } + }; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) - std::vector> create_module; + std::vector create_module; // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) - std::vector> create_source; + std::vector create_source; // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) - std::function create_driver; + std::optional create_driver; template - std::function plugin_loader(std::string const& spec, std::string const& symbol_name) + std::pair + plugin_loader(std::string const& spec, std::string const& symbol_name) { // Called during single-threaded graph construction char const* plugin_path_ptr = @@ -51,7 +86,9 @@ namespace phlex::experimental { // (e.g., NumPy). Load all other plugins with rtld_local (default) to avoid symbol collisions. auto const load_mode = (spec == pymodule_name) ? boost::dll::load_mode::rtld_global : boost::dll::load_mode::default_mode; - return boost::dll::import_symbol(shared_library_path, symbol_name, load_mode); + boost::dll::shared_library lib{shared_library_path, load_mode}; + auto* fn = &lib.get(symbol_name); + return {std::move(lib), fn}; } } throw std::runtime_error("Could not locate library with specification '"s + spec + @@ -88,8 +125,8 @@ namespace phlex::experimental { auto const adjusted_config = detail::adjust_config(label, std::move(raw_config)); auto const& spec = value_to(adjusted_config.at("cpp")); - auto& creator = - create_module.emplace_back(plugin_loader(spec, "create_module")); + auto [lib, fn] = plugin_loader(spec, "create_module"); + auto& creator = create_module.emplace_back(module_plugin{std::move(lib), fn}); configuration const config{adjusted_config}; creator(g.module_proxy(config), config); @@ -100,8 +137,8 @@ namespace phlex::experimental { auto const adjusted_config = detail::adjust_config(label, std::move(raw_config)); auto const& spec = value_to(adjusted_config.at("cpp")); - auto& creator = - create_source.emplace_back(plugin_loader(spec, "create_source")); + auto [lib, fn] = plugin_loader(spec, "create_source"); + auto& creator = create_source.emplace_back(source_plugin{std::move(lib), fn}); // FIXME: Should probably use the parameter name (e.g.) 'plugin_label' instead of // 'module_label', but that requires adjusting other parts of the system @@ -119,9 +156,10 @@ namespace phlex::experimental { // False positive: clang-analyzer cannot trace ownership through Boost's is_any_of // internal reference counting in classification.hpp. // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks,clang-analyzer-cplusplus.NewDelete) - create_driver = plugin_loader(spec, "create_driver"); + auto [lib, fn] = plugin_loader(spec, "create_driver"); + create_driver.emplace(driver_plugin{std::move(lib), fn}); driver_bundle result; - create_driver(driver_proxy{}, config, &result); + (*create_driver)(driver_proxy{}, config, &result); return result; } } From 305dcc6a432dafe5bc5f83d21dae0f78838757e4 Mon Sep 17 00:00:00 2001 From: Marc Paterno Date: Wed, 24 Jun 2026 15:06:03 -0500 Subject: [PATCH 5/5] docs(module): add output to module_graph_proxy registration list --- phlex/module.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phlex/module.hpp b/phlex/module.hpp index 1f009d39b..6153842f9 100644 --- a/phlex/module.hpp +++ b/phlex/module.hpp @@ -12,7 +12,7 @@ namespace phlex::experimental { /// @brief Proxy for registering module algorithm nodes. /// /// Passed to @c PHLEX_REGISTER_ALGORITHMS plugin entry points. Provides - /// access to fold, observe, predicate, transform, and unfold registration. + /// access to fold, observe, output, predicate, transform, and unfold registration. /// Users never construct this type directly. template class module_graph_proxy : graph_proxy {