From aeb55d12692a6e1f4af8267bf5fcfbdd6a4c4170 Mon Sep 17 00:00:00 2001 From: FritzFlorian Date: Mon, 18 May 2020 12:43:38 +0200 Subject: [PATCH] Add output for basic stats: T_1, T_inf, T_wall, memory and basic graph drawing. Basic profiling works. Next steps are to 'refine' the interface, i.e. allow to disable parts of the profiling and better output for some stats. Maybe look at storing/loading the DAG's with stats to/from disk. --- lib/pls/include/pls/internal/profiling/dag_node.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- lib/pls/include/pls/internal/profiling/profiler.h | 33 +++++++++++++++++++++++++-------- 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/lib/pls/include/pls/internal/profiling/dag_node.h b/lib/pls/include/pls/internal/profiling/dag_node.h index f3c6d0e..8254969 100644 --- a/lib/pls/include/pls/internal/profiling/dag_node.h +++ b/lib/pls/include/pls/internal/profiling/dag_node.h @@ -1,9 +1,12 @@ #ifndef PLS_INTERNAL_PROFILING_DAG_NODE_H_ #define PLS_INTERNAL_PROFILING_DAG_NODE_H_ +#include "pls/internal/base/system_details.h" + #include #include #include +#include namespace pls::internal::profiling { @@ -17,6 +20,37 @@ struct dag_node { std::unique_ptr next_node_; std::list child_nodes_; + base::system_details::pointer_t node_print_id() { + return reinterpret_cast(this); + } + + void dag_compact() { + while (next_node_ && next_node_->child_nodes_.empty()) { + max_memory_ = std::max(max_memory_, next_node_->max_memory_); + total_runtime_ += next_node_->total_runtime_; + next_node_ = std::move(next_node_->next_node_); + } + if (next_node_) { + next_node_->dag_compact(); + } + + for (auto &child : child_nodes_) { + child.dag_compact(); + } + } + + void dag_print(std::ostream &stream, unsigned rank) { + stream << node_print_id() << " [label=" << spawning_thread_id_ << ", rank=" << rank << "];" << std::endl; + for (auto &child : child_nodes_) { + child.dag_print(stream, rank + 1); + stream << node_print_id() << " -> " << child.node_print_id() << ";" << std::endl; + } + if (next_node_) { + next_node_->dag_print(stream, rank); + stream << node_print_id() << " -> " << next_node_->node_print_id() << ";" << std::endl; + } + } + unsigned dag_max_memory() { unsigned max = max_memory_; if (next_node_) { @@ -29,7 +63,7 @@ struct dag_node { } unsigned long dag_total_user_time() { - unsigned total_user_time = total_runtime_; + unsigned long total_user_time = total_runtime_; if (next_node_) { total_user_time += next_node_->dag_total_user_time(); } @@ -38,6 +72,21 @@ struct dag_node { } return total_user_time; } + + unsigned long dag_critical_path() { + unsigned long critical_path = total_runtime_; + if (next_node_) { + critical_path += next_node_->dag_critical_path(); + } + + unsigned long slowest_child = 0; + for (auto &child : child_nodes_) { + slowest_child = std::max(slowest_child, child.dag_critical_path()); + } + critical_path += slowest_child; + + return critical_path; + } }; } diff --git a/lib/pls/include/pls/internal/profiling/profiler.h b/lib/pls/include/pls/internal/profiling/profiler.h index 883b29e..29972a0 100644 --- a/lib/pls/include/pls/internal/profiling/profiler.h +++ b/lib/pls/include/pls/internal/profiling/profiler.h @@ -2,7 +2,7 @@ #ifndef PLS_INTERNAL_PROFILING_PROFILER_H_ #define PLS_INTERNAL_PROFILING_PROFILER_H_ -#define PLS_PROFILING_ENABLED true +#define PLS_PROFILING_ENABLED false #include #include @@ -16,7 +16,8 @@ namespace pls::internal::profiling { class profiler { using clock = std::chrono::steady_clock; - using resolution = std::chrono::nanoseconds; + using measurement_resolution = std::chrono::nanoseconds; + using display_resolution = std::chrono::microseconds; struct profiler_run { profiler_run(unsigned num_threads) : start_time_{}, @@ -29,12 +30,22 @@ class profiler { std::unique_ptr root_node_; std::vector per_thread_stats_; + static unsigned long m_to_d(unsigned long duration) { + measurement_resolution measurement_duration{duration}; + return std::chrono::duration_cast(measurement_duration).count(); + } + void print_stats(unsigned num_threads) const { - auto run_duration = std::chrono::duration_cast(end_time_ - start_time_).count(); + root_node_->dag_compact(); + + auto run_duration = std::chrono::duration_cast(end_time_ - start_time_).count(); std::cout << "===========================" << std::endl; - std::cout << "WALL TIME: " << run_duration << std::endl; + std::cout << "WALL TIME: " << m_to_d(run_duration) << std::endl; unsigned long total_user_time = root_node_->dag_total_user_time(); - std::cout << "USER TIME: " << total_user_time << std::endl; + std::cout << "USER TIME: " << m_to_d(total_user_time) << std::endl; + unsigned long critical_path_time = root_node_->dag_critical_path(); + std::cout << "CRITICAL TIME: " << m_to_d(critical_path_time) << std::endl; + std::cout << "MAX SPEEDUP:" << (double) total_user_time / (double) critical_path_time << std::endl; unsigned long total_failed_steals = 0; unsigned long total_successful_steals = 0; @@ -44,7 +55,7 @@ class profiler { total_successful_steals += thread_stats.successful_steals_; total_steal_time += thread_stats.total_time_stealing_; } - std::cout << "STEALS: (Time " << total_steal_time + std::cout << "STEALS: (Time " << m_to_d(total_steal_time) << ", Total: " << (total_successful_steals + total_failed_steals) << ", Success: " << total_successful_steals << ", Failed: " << total_failed_steals << ")" << std::endl; @@ -54,6 +65,11 @@ class profiler { std::cout << "Wall Time vs. Measured: " << 100.0 * total_measured / total_wall << std::endl; std::cout << "MEMORY: " << root_node_->dag_max_memory() << " bytes per stack" << std::endl; + + // TODO: re-add graph printing +// std::cout << "digraph {" << std::endl; +// root_node_->dag_print(std::cout, 0); +// std::cout << "}" << std::endl; } }; @@ -90,7 +106,8 @@ class profiler { auto end_time = clock::now(); auto - steal_duration = std::chrono::duration_cast(end_time - thread_stats.stealing_start_time_).count(); + steal_duration = + std::chrono::duration_cast(end_time - thread_stats.stealing_start_time_).count(); thread_stats.total_time_stealing_ += steal_duration; }); } @@ -138,7 +155,7 @@ class profiler { thread_stats.run_on_stack([&] { auto end_time = clock::now(); auto user_code_duration = - std::chrono::duration_cast(end_time - thread_stats.task_run_start_time).count(); + std::chrono::duration_cast(end_time - thread_stats.task_run_start_time).count(); in_node->total_runtime_ += user_code_duration; }); } -- libgit2 0.26.0