diff --git a/README.md b/README.md index 3026a8b..9b22f58 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,10 @@ Available Settings: - Only one sanitizer can be active at once - Enabling has a performance hit (do not use in releases) +commit | fft(1) | fft(2) +--- | --- | --- +[d2eff7da](https://lab.las3.de/gitlab/las3/development/scheduling/predictable_parallel_patterns/commit/d2eff7dafcd822a4da662c2b4606d504b8545483) | 12.5ms -5.3% | 6.5ms -5.3% + ### Testing diff --git a/app/invoke_parallel/CMakeLists.txt b/app/invoke_parallel/CMakeLists.txt index adcb6be..944f5ef 100644 --- a/app/invoke_parallel/CMakeLists.txt +++ b/app/invoke_parallel/CMakeLists.txt @@ -1,2 +1,5 @@ add_executable(invoke_parallel main.cpp) -target_link_libraries(invoke_parallel pls easy_profiler) +target_link_libraries(invoke_parallel pls) +if(EASY_PROFILER) + target_link_libraries(invoke_parallel easy_profiler) +endif() diff --git a/app/invoke_parallel/main.cpp b/app/invoke_parallel/main.cpp index e16b8f7..4ae48ef 100644 --- a/app/invoke_parallel/main.cpp +++ b/app/invoke_parallel/main.cpp @@ -1,7 +1,7 @@ #include -#include +#include -#include +#include static pls::static_scheduler_memory<8, 2 << 14> my_scheduler_memory; @@ -33,17 +33,19 @@ long fib(long n) { } int main() { - EASY_PROFILER_ENABLE; + PROFILE_ENABLE pls::scheduler scheduler{&my_scheduler_memory, 8}; long result; scheduler.perform_work([&] { - EASY_MAIN_THREAD; + PROFILE_MAIN_THREAD // Call looks just the same, only requirement is // the enclosure in the perform_work lambda. - result = fib(30); + for (int i = 0; i < 10; i++) { + result = fib(30); + std::cout << "Fib(30)=" << result << std::endl; + } }); - std::cout << "Fib(30)=" << result << std::endl; - profiler::dumpBlocksToFile("test_profile.prof"); + PROFILE_SAVE("test_profile.prof") } diff --git a/cmake/SetupEasyProfiler.cmake b/cmake/SetupEasyProfiler.cmake index ddadb5f..745fedc 100644 --- a/cmake/SetupEasyProfiler.cmake +++ b/cmake/SetupEasyProfiler.cmake @@ -1,17 +1,19 @@ -# Optional external dependencies -find_package(easy_profiler) - option(EASY_PROFILER "Enable the profiler" OFF) if(EASY_PROFILER) - if(easy_profiler_FOUND) + # Optional external dependencies + find_package(easy_profiler) + if(easy_profiler_FOUND) + # Do nothing, add definitions below else() message(WARNING "EasyProfiler dependency not found on system, DISABLING it!") set(EASY_PROFILER OFF) endif() endif() -if(NOT EASY_PROFILER) +if(EASY_PROFILER) + add_definitions(-DENABLE_EASY_PROFILER) +else() add_definitions(-DDISABLE_EASY_PROFILER) endif() diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index 31fb1eb..758a947 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -3,7 +3,7 @@ add_library(pls STATIC src/pls.cpp include/pls/pls.h src/internal/base/spin_lock.cpp include/pls/internal/base/spin_lock.h src/internal/base/thread.cpp include/pls/internal/base/thread.h - include/pls/internal/base/prohibit_new.h + include/pls/internal/helpers/prohibit_new.h src/internal/scheduling/abstract_task.cpp include/pls/internal/scheduling/abstract_task.h src/internal/scheduling/scheduler.cpp include/pls/internal/scheduling/scheduler.h src/internal/scheduling/thread_state.cpp include/pls/internal/scheduling/thread_state.h @@ -16,7 +16,8 @@ add_library(pls STATIC src/internal/base/deque.cpp include/pls/internal/base/deque.h src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h include/pls/internal/base/error_handling.h - include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp) + include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp + include/pls/internal/helpers/profiler.h) # Add everything in `./include` to be in the include path of this project target_include_directories(pls diff --git a/lib/pls/include/pls/internal/helpers/profiler.h b/lib/pls/include/pls/internal/helpers/profiler.h new file mode 100644 index 0000000..221994d --- /dev/null +++ b/lib/pls/include/pls/internal/helpers/profiler.h @@ -0,0 +1,33 @@ + +#ifndef PLS_PROFILER_H +#define PLS_PROFILER_H +#ifdef ENABLE_EASY_PROFILER + +#include + +#define PROFILE_WORK_BLOCK(msg) EASY_BLOCK(msg, profiler::colors::LightGreen) +#define PROFILE_FORK_JOIN_STEALING(msg) EASY_BLOCK(msg, profiler::colors::LightBlue) +#define PROFILE_STEALING(msg) EASY_BLOCK(msg, profiler::colors::Blue) +#define PROFILE_LOCK(msg) EASY_BLOCK(msg, profiler::colors::Red) + +#define PROFILE_END_BLOCK EASY_END_BLOCK + +#define PROFILE_SAVE(filename) profiler::dumpBlocksToFile(filename); +#define PROFILE_ENABLE EASY_PROFILER_ENABLE +#define PROFILE_MAIN_THREAD EASY_MAIN_THREAD + +#else //ENABLE_EASY_PROFILER + +#define PROFILE_WORK_BLOCK(msg) +#define PROFILE_FORK_JOIN_STEALING(msg) +#define PROFILE_STEALING(msg) +#define PROFILE_LOCK(msg) + +#define PROFILE_END_BLOCK + +#define PROFILE_SAVE(filename) +#define PROFILE_ENABLE +#define PROFILE_MAIN_THREAD + +#endif //ENABLE_EASY_PROFILER +#endif //PLS_PROFILER_H diff --git a/lib/pls/include/pls/internal/base/prohibit_new.h b/lib/pls/include/pls/internal/helpers/prohibit_new.h similarity index 100% rename from lib/pls/include/pls/internal/base/prohibit_new.h rename to lib/pls/include/pls/internal/helpers/prohibit_new.h diff --git a/lib/pls/include/pls/internal/scheduling/fork_join_task.h b/lib/pls/include/pls/internal/scheduling/fork_join_task.h index 36f5ccd..830772f 100644 --- a/lib/pls/include/pls/internal/scheduling/fork_join_task.h +++ b/lib/pls/include/pls/internal/scheduling/fork_join_task.h @@ -2,7 +2,7 @@ #ifndef PLS_TBB_LIKE_TASK_H #define PLS_TBB_LIKE_TASK_H -#include +#include "pls/internal/helpers/profiler.h" #include "pls/internal/base/aligned_stack.h" #include "pls/internal/base/deque.h" @@ -86,7 +86,7 @@ namespace pls { last_stolen_{nullptr} {}; void execute() override { - EASY_BLOCK("execute fork_join_task", profiler::colors::LightGreen); + PROFILE_WORK_BLOCK("execute fork_join_task"); // Bind this instance to our OS thread my_stack_ = base::this_thread::state()->task_stack_; @@ -102,7 +102,7 @@ namespace pls { template void fork_join_sub_task::spawn_child(const T& task) { - EASY_FUNCTION(profiler::colors::Blue) + PROFILE_FORK_JOIN_STEALING("spawn_child") static_assert(std::is_base_of::value, "Only pass fork_join_sub_task subclasses!"); T* new_task = tbb_task_->my_stack_->push(task); diff --git a/lib/pls/include/pls/internal/scheduling/root_task.h b/lib/pls/include/pls/internal/scheduling/root_task.h index cdb7f0b..6834b6b 100644 --- a/lib/pls/include/pls/internal/scheduling/root_task.h +++ b/lib/pls/include/pls/internal/scheduling/root_task.h @@ -2,12 +2,13 @@ #ifndef PLS_ROOT_MASTER_TASK_H #define PLS_ROOT_MASTER_TASK_H -#include #include -#include "abstract_task.h" +#include "pls/internal/helpers/profiler.h" #include "pls/internal/base/spin_lock.h" +#include "abstract_task.h" + namespace pls { namespace internal { namespace scheduling { @@ -30,7 +31,7 @@ namespace pls { } void execute() override { - EASY_BLOCK("execute root_task", profiler::colors::LightGreen); + PROFILE_WORK_BLOCK("execute root_task"); function_(); finished_ = 1; } @@ -54,7 +55,7 @@ namespace pls { master_task_{master_task} {} void execute() override { - EASY_BLOCK("execute root_task", profiler::colors::LightGreen); + PROFILE_WORK_BLOCK("execute root_task"); do { steal_work(); } while (!master_task_->finished()); diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index b0e7136..55e72b5 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -2,10 +2,11 @@ #ifndef PLS_SCHEDULER_H #define PLS_SCHEDULER_H -#include #include #include +#include "pls/internal/helpers/profiler.h" + #include "pls/internal/base/aligned_stack.h" #include "pls/internal/base/thread.h" #include "pls/internal/base/barrier.h" @@ -34,7 +35,7 @@ namespace pls { template void perform_work(Function work_section) { - EASY_FUNCTION(); + PROFILE_WORK_BLOCK("scheduler::perform_work") root_task master{work_section}; // Push root task on stacks diff --git a/lib/pls/src/internal/scheduling/abstract_task.cpp b/lib/pls/src/internal/scheduling/abstract_task.cpp index 7cf7dca..3b75bd0 100644 --- a/lib/pls/src/internal/scheduling/abstract_task.cpp +++ b/lib/pls/src/internal/scheduling/abstract_task.cpp @@ -1,4 +1,4 @@ -#include +#include "pls/internal/helpers/profiler.h" #include "pls/internal/scheduling/thread_state.h" #include "pls/internal/scheduling/abstract_task.h" @@ -8,7 +8,7 @@ namespace pls { namespace internal { namespace scheduling { bool abstract_task::steal_work() { - EASY_FUNCTION(profiler::colors::Orange); + PROFILE_STEALING("abstract_task::steal_work") auto my_state = base::this_thread::state(); auto my_scheduler = my_state->scheduler_; @@ -18,19 +18,19 @@ namespace pls { auto target_state = my_scheduler->thread_state_for(target); // TODO: Cleaner Locking Using std::guarded_lock - EASY_BLOCK("Acquire Thread Lock", profiler::colors::Red) + PROFILE_LOCK("Acquire Thread Lock") target_state->lock_.lock(); - EASY_END_BLOCK; + PROFILE_END_BLOCK // Dig down to our level - EASY_BLOCK("Go to our level") + PROFILE_STEALING("Go to our level") abstract_task* current_task = target_state->root_task_; while (current_task != nullptr && current_task->depth() < depth()) { current_task = current_task->child_task_; } - EASY_END_BLOCK; + PROFILE_END_BLOCK - EASY_BLOCK("Internal Steal") + PROFILE_STEALING("Internal Steal") if (current_task != nullptr) { // See if it equals our type and depth of task if (current_task->unique_id_ == unique_id_ && @@ -45,12 +45,12 @@ namespace pls { current_task = current_task->child_task_; } } - EASY_END_BLOCK; + PROFILE_END_BLOCK; // Execute 'top level task steal' if possible // (only try deeper tasks to keep depth restricted stealing) - EASY_BLOCK("Top Level Steal") + PROFILE_STEALING("Top Level Steal") while (current_task != nullptr) { auto lock = &target_state->lock_; if (current_task->split_task(lock)) { @@ -60,7 +60,7 @@ namespace pls { current_task = current_task->child_task_; } - EASY_END_BLOCK; + PROFILE_END_BLOCK; target_state->lock_.unlock(); } diff --git a/lib/pls/src/internal/scheduling/fork_join_task.cpp b/lib/pls/src/internal/scheduling/fork_join_task.cpp index e4c9719..1f1360c 100644 --- a/lib/pls/src/internal/scheduling/fork_join_task.cpp +++ b/lib/pls/src/internal/scheduling/fork_join_task.cpp @@ -1,4 +1,4 @@ -#include +#include "pls/internal/helpers/profiler.h" #include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/fork_join_task.h" @@ -21,11 +21,11 @@ namespace pls { stack_state_{nullptr} {} void fork_join_sub_task::execute() { - EASY_BLOCK("execute sub_task", profiler::colors::Green); + PROFILE_WORK_BLOCK("execute sub_task") tbb_task_->currently_executing_ = this; execute_internal(); tbb_task_->currently_executing_ = nullptr; - EASY_END_BLOCK; + PROFILE_END_BLOCK wait_for_all(); if (parent_ != nullptr) { @@ -47,17 +47,17 @@ namespace pls { void fork_join_sub_task::wait_for_all() { while (ref_count_ > 0) { - EASY_BLOCK("get local sub task", profiler::colors::Blue) + PROFILE_STEALING("get local sub task") fork_join_sub_task* local_task = tbb_task_->get_local_sub_task(); - EASY_END_BLOCK + PROFILE_END_BLOCK if (local_task != nullptr) { local_task->execute(); } else { // Try to steal work. // External steal will be executed implicitly if success - EASY_BLOCK("steal work", profiler::colors::Blue) + PROFILE_STEALING("steal work") bool internal_steal_success = tbb_task_->steal_work(); - EASY_END_BLOCK + PROFILE_END_BLOCK if (internal_steal_success) { tbb_task_->last_stolen_->execute(); } @@ -75,7 +75,7 @@ namespace pls { } bool fork_join_task::internal_stealing(abstract_task* other_task) { - EASY_FUNCTION(profiler::colors::Blue); + PROFILE_STEALING("fork_join_task::internal_stealin") auto cast_other_task = reinterpret_cast(other_task); auto stolen_sub_task = cast_other_task->get_stolen_sub_task(); @@ -93,7 +93,7 @@ namespace pls { } bool fork_join_task::split_task(base::spin_lock* lock) { - EASY_FUNCTION(profiler::colors::Blue); + PROFILE_STEALING("fork_join_task::split_task") fork_join_sub_task* stolen_sub_task = get_stolen_sub_task(); if (stolen_sub_task == nullptr) { return false;