diff --git a/lib/pls/include/pls/algorithms/scan_impl.h b/lib/pls/include/pls/algorithms/scan_impl.h index 5abe558..1af0418 100644 --- a/lib/pls/include/pls/algorithms/scan_impl.h +++ b/lib/pls/include/pls/algorithms/scan_impl.h @@ -93,6 +93,9 @@ class scan_task : public pls::internal::scheduling::task { } } }, fixed_strategy{1}); + + wait_for_all(); + this->~scan_task(); } }; diff --git a/lib/pls/include/pls/dataflow/internal/function_node.h b/lib/pls/include/pls/dataflow/internal/function_node.h index d7490b7..47fed95 100644 --- a/lib/pls/include/pls/dataflow/internal/function_node.h +++ b/lib/pls/include/pls/dataflow/internal/function_node.h @@ -105,6 +105,10 @@ class function_node, outputs, F> : public node { auto invocation = new(memory) invocation_memory{}; invocation->inputs_missing_ = num_in_ports; }; + void clean_up_instance_buffer(void *memory) const override { + auto invocation = reinterpret_cast(memory); + invocation->~invocation_memory(); + } private: multi_in_port_type in_port_; diff --git a/lib/pls/include/pls/dataflow/internal/graph.h b/lib/pls/include/pls/dataflow/internal/graph.h index fa984dc..d1580b8 100644 --- a/lib/pls/include/pls/dataflow/internal/graph.h +++ b/lib/pls/include/pls/dataflow/internal/graph.h @@ -115,6 +115,10 @@ class graph, outputs> : public node { void init_instance_buffer(void *memory) const override { new(memory) invocation_memory{}; }; + void clean_up_instance_buffer(void *memory) const override { + auto invocation = reinterpret_cast(memory); + invocation->~invocation_memory(); + } private: inputs_type inputs_; diff --git a/lib/pls/include/pls/dataflow/internal/graph_impl.h b/lib/pls/include/pls/dataflow/internal/graph_impl.h index 377673f..f135982 100644 --- a/lib/pls/include/pls/dataflow/internal/graph_impl.h +++ b/lib/pls/include/pls/dataflow/internal/graph_impl.h @@ -129,9 +129,22 @@ class graph, outputs>::run_graph_task : public pls::t self_->get_invocation(invocation_)->output_buffer_ = output; } + ~run_graph_task() { + node *iterator = self_->node_list_start_; + for (int i = 0; i < self_->num_nodes_; i++) { + void* memory = invocation_.get_instance_buffer(i); + iterator->clean_up_instance_buffer(memory); + + iterator = iterator->direct_successor_; + } + } + void execute_internal() override { PROFILE_WORK_BLOCK("Graph Invocation") feed_inputs<0, I0, I...>{self_->inputs_, input_, invocation_}.run(); + + wait_for_all(); + this->~run_graph_task(); PROFILE_END_BLOCK } }; diff --git a/lib/pls/include/pls/dataflow/internal/merge_node.h b/lib/pls/include/pls/dataflow/internal/merge_node.h index 2a7a6c5..8da2e64 100644 --- a/lib/pls/include/pls/dataflow/internal/merge_node.h +++ b/lib/pls/include/pls/dataflow/internal/merge_node.h @@ -102,6 +102,10 @@ class merge_node : public node { auto invocation = new(memory) invocation_memory{}; invocation->inputs_missing_ = INITIAL_STATE; }; + void clean_up_instance_buffer(void *memory) const override { + auto invocation = reinterpret_cast(memory); + invocation->~invocation_memory(); + } private: multi_in_port_type in_port_; diff --git a/lib/pls/include/pls/dataflow/internal/node.h b/lib/pls/include/pls/dataflow/internal/node.h index 0b9c447..037a3eb 100644 --- a/lib/pls/include/pls/dataflow/internal/node.h +++ b/lib/pls/include/pls/dataflow/internal/node.h @@ -25,6 +25,7 @@ class node { virtual int instance_buffer_size() const = 0; virtual void init_instance_buffer(void *memory) const = 0; + virtual void clean_up_instance_buffer(void *memory) const = 0; virtual bool is_fully_connected() const = 0; diff --git a/lib/pls/include/pls/dataflow/internal/split_node.h b/lib/pls/include/pls/dataflow/internal/split_node.h index c0956d2..363f32c 100644 --- a/lib/pls/include/pls/dataflow/internal/split_node.h +++ b/lib/pls/include/pls/dataflow/internal/split_node.h @@ -60,6 +60,9 @@ class split_node : public node { void init_instance_buffer(void *) const override { // No need for memory, we simply forward entries without buffering }; + void clean_up_instance_buffer(void *memory) const override { + // No need for memory, we simply forward entries without buffering + } private: multi_in_port_type in_port_; diff --git a/lib/pls/include/pls/dataflow/internal/switch_node.h b/lib/pls/include/pls/dataflow/internal/switch_node.h index 598275b..b371c0b 100644 --- a/lib/pls/include/pls/dataflow/internal/switch_node.h +++ b/lib/pls/include/pls/dataflow/internal/switch_node.h @@ -86,6 +86,10 @@ class switch_node : public node { auto invocation = new(memory) invocation_memory{}; invocation->inputs_missing_ = 2; }; + void clean_up_instance_buffer(void *memory) const override { + auto invocation = reinterpret_cast(memory); + invocation->~invocation_memory(); + } private: multi_in_port_type in_port_; diff --git a/lib/pls/include/pls/internal/scheduling/lambda_task.h b/lib/pls/include/pls/internal/scheduling/lambda_task.h index fb6cc4a..1d3d4b1 100644 --- a/lib/pls/include/pls/internal/scheduling/lambda_task.h +++ b/lib/pls/include/pls/internal/scheduling/lambda_task.h @@ -18,6 +18,9 @@ class lambda_task_by_reference : public task { protected: void execute_internal() override { function_(); + + wait_for_all(); + this->~lambda_task_by_reference(); } }; @@ -31,6 +34,9 @@ class lambda_task_by_value : public task { protected: void execute_internal() override { function_(); + + wait_for_all(); + this->~lambda_task_by_value(); } }; diff --git a/lib/pls/include/pls/internal/scheduling/task.h b/lib/pls/include/pls/internal/scheduling/task.h index e8925cb..e7b23fb 100644 --- a/lib/pls/include/pls/internal/scheduling/task.h +++ b/lib/pls/include/pls/internal/scheduling/task.h @@ -13,6 +13,18 @@ namespace pls { namespace internal { namespace scheduling { +/** + * A task to be executed by the runtime system. + * Tasks are guaranteed to be executed exactly once. + * + * Override the execute_internal() method for your custom code. + * + * IMPORTANT: + * Tasks memory is re-used without calling the destructor. + * You must call it yourself at the end of execute_internal(). + * This is done to not introduce any overhead of virtual function calls + * if no clean up is required. + */ class task { friend class scheduler; @@ -29,6 +41,10 @@ class task { protected: /* * Must call the parent constructor. + * + * IMPORTANT: + * Tasks memory is re-used without calling the destructor. + * You must call it yourself at the end of execute_internal(). */ explicit task(); @@ -37,6 +53,9 @@ class task { * Memory will be pushed onto the stack (in aligned memory, thus avoid many small chunks). * MUST be called in constructor, never afterwards. * + * Memory is fully self managed. Calling e.g. deconstructors when not needing objects + * anymore is the users responsibility (memory is simply re-used after the life time of the task ends). + * * @param size Number of bytes to be allocated * @return The allocated memory region */