Commit 72bffbb1 by FritzFlorian

Add optional easy_profiler dependency to the project.

parent 351ced3d
Pipeline #1125 failed with stages
in 30 seconds
...@@ -15,6 +15,7 @@ include(cmake/SetupOptimizationLevel.cmake) ...@@ -15,6 +15,7 @@ include(cmake/SetupOptimizationLevel.cmake)
include(cmake/SetupThreadingSupport.cmake) include(cmake/SetupThreadingSupport.cmake)
include(cmake/SetupThreadSanitizer.cmake) include(cmake/SetupThreadSanitizer.cmake)
include(cmake/SetupAddressSanitizer.cmake) include(cmake/SetupAddressSanitizer.cmake)
include(cmake/SetupEasyProfiler.cmake)
# make our internal cmake script collection avaliable in the build process. # make our internal cmake script collection avaliable in the build process.
list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake") list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake")
......
...@@ -4,7 +4,16 @@ A collection of stuff that we noticed during development. ...@@ -4,7 +4,16 @@ A collection of stuff that we noticed during development.
Useful later on two write a project report and to go back Useful later on two write a project report and to go back
in time to find out why certain decisions where made. in time to find out why certain decisions where made.
## 28.03.2018 - custom new operators ## 02.04.2019 - CMake Export
We built our project using CMake to make it portable and easy to setup.
To allow others to use our library we need to make it installable on
other systems. For this we use CMake's install feature and
a [tutorial](https://pabloariasal.github.io/2018/02/19/its-time-to-do-cmake-right/)
on how to correctly configure a CMake library to be included by other
projects.
## 28.03.2019 - custom new operators
When initializing sub_tasks we want to place them on our custom When initializing sub_tasks we want to place them on our custom
'stack like' data structure per thread. We looked at TBB's API 'stack like' data structure per thread. We looked at TBB's API
......
...@@ -43,3 +43,26 @@ keep the repository in a state where the sanitizer reports errors. ...@@ -43,3 +43,26 @@ keep the repository in a state where the sanitizer reports errors.
Consider reading [the section on common data races](https://github.com/google/sanitizers/wiki/ThreadSanitizerPopularDataRaces) Consider reading [the section on common data races](https://github.com/google/sanitizers/wiki/ThreadSanitizerPopularDataRaces)
to get an idea of what we try to avoid in our code. to get an idea of what we try to avoid in our code.
### Profiling
To make profiling portable and allow us to later analyze the logs
programaticly we use [easy_profiler](https://github.com/yse/easy_profiler)
for capturing data. To enable profiling install the library on your system
(best building it and then running `make install`) and set the
cmake option `-DEASY_PROFILER=ON`.
After that see the `invoke_parallel` example app for activating the
profiler. This will generate a trace file that can be viewed with
the `profiler_gui <output.prof>` command.
Please note that the profiler adds overhead when looking at sub millisecond
method invokations as we do and it can not replace a seperate
profiler like `gperf` or `valgrind` for detailed analysis.
We still think it makes sense to add it in as an optional feature,
as the customizable colors and fine grained events (including collection
of variables) can be used to visualize the `big picture` of
program execution. Also, we hope to use it to log 'events' like
successful and failed steals in the future, as the general idea of logging
information per thread efficiently might be helpful for further
analysis.
add_executable(invoke_parallel main.cpp) add_executable(invoke_parallel main.cpp)
target_link_libraries(invoke_parallel pls) target_link_libraries(invoke_parallel pls easy_profiler)
#include <pls/pls.h> #include <pls/pls.h>
#include <iostream> #include <iostream>
static pls::static_scheduler_memory<8, 2 << 10> my_scheduler_memory; #include <easy/profiler.h>
static constexpr int CUTOFF = 20; static pls::static_scheduler_memory<8, 2 << 14> my_scheduler_memory;
static constexpr int CUTOFF = 10;
long fib_serial(long n) { long fib_serial(long n) {
if (n == 0) { if (n == 0) {
...@@ -31,19 +33,17 @@ long fib(long n) { ...@@ -31,19 +33,17 @@ long fib(long n) {
} }
int main() { int main() {
EASY_PROFILER_ENABLE;
pls::scheduler scheduler{&my_scheduler_memory, 8}; pls::scheduler scheduler{&my_scheduler_memory, 8};
scheduler.perform_work([] { long result;
auto start = std::chrono::high_resolution_clock::now(); scheduler.perform_work([&] {
EASY_MAIN_THREAD;
// Call looks just the same, only requirement is // Call looks just the same, only requirement is
// the enclosure in the perform_work lambda. // the enclosure in the perform_work lambda.
long result = fib(30); result = fib(30);
auto end = std::chrono::high_resolution_clock::now();
long time = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "Fib(30)=" << result << std::endl;
std::cout << "Execution time in us: " << time << std::endl;
}); });
std::cout << "Fib(30)=" << result << std::endl;
profiler::dumpBlocksToFile("test_profile.prof");
} }
# Optional external dependencies
find_package(easy_profiler)
option(EASY_PROFILER "Enable the profiler" OFF)
if(EASY_PROFILER)
if(easy_profiler_FOUND)
else()
message(WARNING "EasyProfiler dependency not found on system, DISABLING it!")
set(EASY_PROFILER OFF)
endif()
endif()
if(NOT EASY_PROFILER)
add_definitions(-DDISABLE_EASY_PROFILER)
endif()
message("-- Easy Profiler: ${EASY_PROFILER}")
...@@ -29,6 +29,9 @@ target_include_directories(pls ...@@ -29,6 +29,9 @@ target_include_directories(pls
target_link_libraries(pls target_link_libraries(pls
Threads::Threads # pthread support Threads::Threads # pthread support
) )
if(EASY_PROFILER)
target_link_libraries(pls easy_profiler)
endif()
# Rules for istalling the library on a system # Rules for istalling the library on a system
# ...binaries # ...binaries
......
...@@ -33,7 +33,7 @@ namespace pls { ...@@ -33,7 +33,7 @@ namespace pls {
auto internal_body = [&] (fork_join_sub_task* this_task){ auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); }; auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
fork_join_lambda<decltype(sub_task_body_1)> sub_task_1(&sub_task_body_1); auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
this_task->spawn_child(sub_task_1); this_task->spawn_child(sub_task_1);
function2(); // Execute last function 'inline' without spawning a sub_task object function2(); // Execute last function 'inline' without spawning a sub_task object
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#ifndef PLS_TBB_LIKE_TASK_H #ifndef PLS_TBB_LIKE_TASK_H
#define PLS_TBB_LIKE_TASK_H #define PLS_TBB_LIKE_TASK_H
#include <easy/profiler.h>
#include "pls/internal/base/aligned_stack.h" #include "pls/internal/base/aligned_stack.h"
#include "pls/internal/base/deque.h" #include "pls/internal/base/deque.h"
...@@ -84,6 +86,8 @@ namespace pls { ...@@ -84,6 +86,8 @@ namespace pls {
last_stolen_{nullptr} {}; last_stolen_{nullptr} {};
void execute() override { void execute() override {
EASY_BLOCK("execute fork_join_task", profiler::colors::LightGreen);
// Bind this instance to our OS thread // Bind this instance to our OS thread
my_stack_ = base::this_thread::state<thread_state>()->task_stack_; my_stack_ = base::this_thread::state<thread_state>()->task_stack_;
root_task_->tbb_task_ = this; root_task_->tbb_task_ = this;
...@@ -98,6 +102,7 @@ namespace pls { ...@@ -98,6 +102,7 @@ namespace pls {
template<typename T> template<typename T>
void fork_join_sub_task::spawn_child(const T& task) { void fork_join_sub_task::spawn_child(const T& task) {
EASY_FUNCTION(profiler::colors::Blue)
static_assert(std::is_base_of<fork_join_sub_task, T>::value, "Only pass fork_join_sub_task subclasses!"); static_assert(std::is_base_of<fork_join_sub_task, T>::value, "Only pass fork_join_sub_task subclasses!");
T* new_task = tbb_task_->my_stack_->push(task); T* new_task = tbb_task_->my_stack_->push(task);
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#ifndef PLS_ROOT_MASTER_TASK_H #ifndef PLS_ROOT_MASTER_TASK_H
#define PLS_ROOT_MASTER_TASK_H #define PLS_ROOT_MASTER_TASK_H
#include <easy/profiler.h>
#include <mutex> #include <mutex>
#include "abstract_task.h" #include "abstract_task.h"
...@@ -13,27 +14,25 @@ namespace pls { ...@@ -13,27 +14,25 @@ namespace pls {
template<typename Function> template<typename Function>
class root_task : public abstract_task { class root_task : public abstract_task {
Function function_; Function function_;
bool finished_; std::atomic_uint8_t finished_;
// Improvement: Remove lock and replace by atomic variable (performance)
base::spin_lock finished_lock_;
public: public:
explicit root_task(Function function): explicit root_task(Function function):
abstract_task{0, id{0}}, abstract_task{0, id{0}},
function_{function}, function_{function},
finished_{false} {} finished_{0} {}
root_task(const root_task& other):
abstract_task{0, id{0}},
function_{other.function_},
finished_{0} {}
bool finished() { bool finished() {
std::lock_guard<base::spin_lock> lock{finished_lock_};
return finished_; return finished_;
} }
void execute() override { void execute() override {
EASY_BLOCK("execute root_task", profiler::colors::LightGreen);
function_(); function_();
{ finished_ = 1;
std::lock_guard<base::spin_lock> lock{finished_lock_};
finished_ = true;
}
} }
bool internal_stealing(abstract_task* /*other_task*/) override { bool internal_stealing(abstract_task* /*other_task*/) override {
...@@ -55,6 +54,7 @@ namespace pls { ...@@ -55,6 +54,7 @@ namespace pls {
master_task_{master_task} {} master_task_{master_task} {}
void execute() override { void execute() override {
EASY_BLOCK("execute root_task", profiler::colors::LightGreen);
do { do {
steal_work(); steal_work();
} while (!master_task_->finished()); } while (!master_task_->finished());
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#ifndef PLS_SCHEDULER_H #ifndef PLS_SCHEDULER_H
#define PLS_SCHEDULER_H #define PLS_SCHEDULER_H
#include <easy/profiler.h>
#include <array> #include <array>
#include <iostream> #include <iostream>
...@@ -64,6 +65,7 @@ namespace pls { ...@@ -64,6 +65,7 @@ namespace pls {
template<typename Function> template<typename Function>
void perform_work(Function work_section) { void perform_work(Function work_section) {
EASY_FUNCTION();
root_task<Function> master{work_section}; root_task<Function> master{work_section};
// Push root task on stacks // Push root task on stacks
......
#include <easy/profiler.h>
#include "pls/internal/scheduling/thread_state.h" #include "pls/internal/scheduling/thread_state.h"
#include "pls/internal/scheduling/abstract_task.h" #include "pls/internal/scheduling/abstract_task.h"
#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/scheduler.h"
...@@ -6,6 +8,7 @@ namespace pls { ...@@ -6,6 +8,7 @@ namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
bool abstract_task::steal_work() { bool abstract_task::steal_work() {
EASY_FUNCTION(profiler::colors::Orange);
auto my_state = base::this_thread::state<thread_state>(); auto my_state = base::this_thread::state<thread_state>();
auto my_scheduler = my_state->scheduler_; auto my_scheduler = my_state->scheduler_;
...@@ -15,14 +18,19 @@ namespace pls { ...@@ -15,14 +18,19 @@ namespace pls {
auto target_state = my_scheduler->thread_state_for(target); auto target_state = my_scheduler->thread_state_for(target);
// TODO: Cleaner Locking Using std::guarded_lock // TODO: Cleaner Locking Using std::guarded_lock
EASY_BLOCK("Acquire Thread Lock", profiler::colors::Red)
target_state->lock_.lock(); target_state->lock_.lock();
EASY_END_BLOCK;
// Dig down to our level // Dig down to our level
EASY_BLOCK("Go to our level")
abstract_task* current_task = target_state->root_task_; abstract_task* current_task = target_state->root_task_;
while (current_task != nullptr && current_task->depth() < depth()) { while (current_task != nullptr && current_task->depth() < depth()) {
current_task = current_task->child_task_; current_task = current_task->child_task_;
} }
EASY_END_BLOCK;
EASY_BLOCK("Internal Steal")
if (current_task != nullptr) { if (current_task != nullptr) {
// See if it equals our type and depth of task // See if it equals our type and depth of task
if (current_task->unique_id_ == unique_id_ && if (current_task->unique_id_ == unique_id_ &&
...@@ -37,10 +45,12 @@ namespace pls { ...@@ -37,10 +45,12 @@ namespace pls {
current_task = current_task->child_task_; current_task = current_task->child_task_;
} }
} }
EASY_END_BLOCK;
// Execute 'top level task steal' if possible // Execute 'top level task steal' if possible
// (only try deeper tasks to keep depth restricted stealing) // (only try deeper tasks to keep depth restricted stealing)
EASY_BLOCK("Top Level Steal")
while (current_task != nullptr) { while (current_task != nullptr) {
auto lock = &target_state->lock_; auto lock = &target_state->lock_;
if (current_task->split_task(lock)) { if (current_task->split_task(lock)) {
...@@ -50,6 +60,7 @@ namespace pls { ...@@ -50,6 +60,7 @@ namespace pls {
current_task = current_task->child_task_; current_task = current_task->child_task_;
} }
EASY_END_BLOCK;
target_state->lock_.unlock(); target_state->lock_.unlock();
} }
......
#include <easy/profiler.h>
#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/fork_join_task.h" #include "pls/internal/scheduling/fork_join_task.h"
...@@ -16,9 +18,11 @@ namespace pls { ...@@ -16,9 +18,11 @@ namespace pls {
} }
void fork_join_sub_task::execute() { void fork_join_sub_task::execute() {
EASY_BLOCK("execute sub_task", profiler::colors::Green);
tbb_task_->currently_executing_ = this; tbb_task_->currently_executing_ = this;
execute_internal(); execute_internal();
tbb_task_->currently_executing_ = nullptr; tbb_task_->currently_executing_ = nullptr;
EASY_END_BLOCK;
wait_for_all(); wait_for_all();
if (parent_ != nullptr) { if (parent_ != nullptr) {
...@@ -40,13 +44,18 @@ namespace pls { ...@@ -40,13 +44,18 @@ namespace pls {
void fork_join_sub_task::wait_for_all() { void fork_join_sub_task::wait_for_all() {
while (ref_count_ > 0) { while (ref_count_ > 0) {
EASY_BLOCK("get local sub task", profiler::colors::Blue)
fork_join_sub_task* local_task = tbb_task_->get_local_sub_task(); fork_join_sub_task* local_task = tbb_task_->get_local_sub_task();
EASY_END_BLOCK
if (local_task != nullptr) { if (local_task != nullptr) {
local_task->execute(); local_task->execute();
} else { } else {
// Try to steal work. // Try to steal work.
// External steal will be executed implicitly if success // External steal will be executed implicitly if success
if (tbb_task_->steal_work()) { EASY_BLOCK("steal work", profiler::colors::Blue)
bool internal_steal_success = tbb_task_->steal_work();
EASY_END_BLOCK
if (internal_steal_success) {
tbb_task_->last_stolen_->execute(); tbb_task_->last_stolen_->execute();
} }
} }
...@@ -63,6 +72,7 @@ namespace pls { ...@@ -63,6 +72,7 @@ namespace pls {
} }
bool fork_join_task::internal_stealing(abstract_task* other_task) { bool fork_join_task::internal_stealing(abstract_task* other_task) {
EASY_FUNCTION(profiler::colors::Blue);
auto cast_other_task = reinterpret_cast<fork_join_task*>(other_task); auto cast_other_task = reinterpret_cast<fork_join_task*>(other_task);
auto stolen_sub_task = cast_other_task->get_stolen_sub_task(); auto stolen_sub_task = cast_other_task->get_stolen_sub_task();
...@@ -80,6 +90,7 @@ namespace pls { ...@@ -80,6 +90,7 @@ namespace pls {
} }
bool fork_join_task::split_task(base::spin_lock* lock) { bool fork_join_task::split_task(base::spin_lock* lock) {
EASY_FUNCTION(profiler::colors::Blue);
fork_join_sub_task* stolen_sub_task = get_stolen_sub_task(); fork_join_sub_task* stolen_sub_task = get_stolen_sub_task();
if (stolen_sub_task == nullptr) { if (stolen_sub_task == nullptr) {
return false; return false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment