Commit 72bffbb1 by FritzFlorian

Add optional easy_profiler dependency to the project.

parent 351ced3d
Pipeline #1125 failed with stages
in 30 seconds
......@@ -15,6 +15,7 @@ include(cmake/SetupOptimizationLevel.cmake)
include(cmake/SetupThreadingSupport.cmake)
include(cmake/SetupThreadSanitizer.cmake)
include(cmake/SetupAddressSanitizer.cmake)
include(cmake/SetupEasyProfiler.cmake)
# make our internal cmake script collection avaliable in the build process.
list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake")
......
......@@ -4,7 +4,16 @@ A collection of stuff that we noticed during development.
Useful later on two write a project report and to go back
in time to find out why certain decisions where made.
## 28.03.2018 - custom new operators
## 02.04.2019 - CMake Export
We built our project using CMake to make it portable and easy to setup.
To allow others to use our library we need to make it installable on
other systems. For this we use CMake's install feature and
a [tutorial](https://pabloariasal.github.io/2018/02/19/its-time-to-do-cmake-right/)
on how to correctly configure a CMake library to be included by other
projects.
## 28.03.2019 - custom new operators
When initializing sub_tasks we want to place them on our custom
'stack like' data structure per thread. We looked at TBB's API
......
......@@ -43,3 +43,26 @@ keep the repository in a state where the sanitizer reports errors.
Consider reading [the section on common data races](https://github.com/google/sanitizers/wiki/ThreadSanitizerPopularDataRaces)
to get an idea of what we try to avoid in our code.
### Profiling
To make profiling portable and allow us to later analyze the logs
programaticly we use [easy_profiler](https://github.com/yse/easy_profiler)
for capturing data. To enable profiling install the library on your system
(best building it and then running `make install`) and set the
cmake option `-DEASY_PROFILER=ON`.
After that see the `invoke_parallel` example app for activating the
profiler. This will generate a trace file that can be viewed with
the `profiler_gui <output.prof>` command.
Please note that the profiler adds overhead when looking at sub millisecond
method invokations as we do and it can not replace a seperate
profiler like `gperf` or `valgrind` for detailed analysis.
We still think it makes sense to add it in as an optional feature,
as the customizable colors and fine grained events (including collection
of variables) can be used to visualize the `big picture` of
program execution. Also, we hope to use it to log 'events' like
successful and failed steals in the future, as the general idea of logging
information per thread efficiently might be helpful for further
analysis.
add_executable(invoke_parallel main.cpp)
target_link_libraries(invoke_parallel pls)
target_link_libraries(invoke_parallel pls easy_profiler)
#include <pls/pls.h>
#include <iostream>
static pls::static_scheduler_memory<8, 2 << 10> my_scheduler_memory;
#include <easy/profiler.h>
static constexpr int CUTOFF = 20;
static pls::static_scheduler_memory<8, 2 << 14> my_scheduler_memory;
static constexpr int CUTOFF = 10;
long fib_serial(long n) {
if (n == 0) {
......@@ -31,19 +33,17 @@ long fib(long n) {
}
int main() {
EASY_PROFILER_ENABLE;
pls::scheduler scheduler{&my_scheduler_memory, 8};
scheduler.perform_work([] {
auto start = std::chrono::high_resolution_clock::now();
long result;
scheduler.perform_work([&] {
EASY_MAIN_THREAD;
// Call looks just the same, only requirement is
// the enclosure in the perform_work lambda.
long result = fib(30);
auto end = std::chrono::high_resolution_clock::now();
long time = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "Fib(30)=" << result << std::endl;
std::cout << "Execution time in us: " << time << std::endl;
result = fib(30);
});
std::cout << "Fib(30)=" << result << std::endl;
profiler::dumpBlocksToFile("test_profile.prof");
}
# Optional external dependencies
find_package(easy_profiler)
option(EASY_PROFILER "Enable the profiler" OFF)
if(EASY_PROFILER)
if(easy_profiler_FOUND)
else()
message(WARNING "EasyProfiler dependency not found on system, DISABLING it!")
set(EASY_PROFILER OFF)
endif()
endif()
if(NOT EASY_PROFILER)
add_definitions(-DDISABLE_EASY_PROFILER)
endif()
message("-- Easy Profiler: ${EASY_PROFILER}")
......@@ -29,6 +29,9 @@ target_include_directories(pls
target_link_libraries(pls
Threads::Threads # pthread support
)
if(EASY_PROFILER)
target_link_libraries(pls easy_profiler)
endif()
# Rules for istalling the library on a system
# ...binaries
......
......@@ -33,7 +33,7 @@ namespace pls {
auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
fork_join_lambda<decltype(sub_task_body_1)> sub_task_1(&sub_task_body_1);
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
this_task->spawn_child(sub_task_1);
function2(); // Execute last function 'inline' without spawning a sub_task object
......
......@@ -2,6 +2,8 @@
#ifndef PLS_TBB_LIKE_TASK_H
#define PLS_TBB_LIKE_TASK_H
#include <easy/profiler.h>
#include "pls/internal/base/aligned_stack.h"
#include "pls/internal/base/deque.h"
......@@ -84,6 +86,8 @@ namespace pls {
last_stolen_{nullptr} {};
void execute() override {
EASY_BLOCK("execute fork_join_task", profiler::colors::LightGreen);
// Bind this instance to our OS thread
my_stack_ = base::this_thread::state<thread_state>()->task_stack_;
root_task_->tbb_task_ = this;
......@@ -97,7 +101,8 @@ namespace pls {
};
template<typename T>
void fork_join_sub_task::spawn_child(const T& task) {
void fork_join_sub_task::spawn_child(const T& task) {
EASY_FUNCTION(profiler::colors::Blue)
static_assert(std::is_base_of<fork_join_sub_task, T>::value, "Only pass fork_join_sub_task subclasses!");
T* new_task = tbb_task_->my_stack_->push(task);
......
......@@ -2,6 +2,7 @@
#ifndef PLS_ROOT_MASTER_TASK_H
#define PLS_ROOT_MASTER_TASK_H
#include <easy/profiler.h>
#include <mutex>
#include "abstract_task.h"
......@@ -13,27 +14,25 @@ namespace pls {
template<typename Function>
class root_task : public abstract_task {
Function function_;
bool finished_;
// Improvement: Remove lock and replace by atomic variable (performance)
base::spin_lock finished_lock_;
std::atomic_uint8_t finished_;
public:
explicit root_task(Function function):
abstract_task{0, id{0}},
function_{function},
finished_{false} {}
finished_{0} {}
root_task(const root_task& other):
abstract_task{0, id{0}},
function_{other.function_},
finished_{0} {}
bool finished() {
std::lock_guard<base::spin_lock> lock{finished_lock_};
return finished_;
}
void execute() override {
EASY_BLOCK("execute root_task", profiler::colors::LightGreen);
function_();
{
std::lock_guard<base::spin_lock> lock{finished_lock_};
finished_ = true;
}
finished_ = 1;
}
bool internal_stealing(abstract_task* /*other_task*/) override {
......@@ -55,6 +54,7 @@ namespace pls {
master_task_{master_task} {}
void execute() override {
EASY_BLOCK("execute root_task", profiler::colors::LightGreen);
do {
steal_work();
} while (!master_task_->finished());
......
......@@ -2,6 +2,7 @@
#ifndef PLS_SCHEDULER_H
#define PLS_SCHEDULER_H
#include <easy/profiler.h>
#include <array>
#include <iostream>
......@@ -64,6 +65,7 @@ namespace pls {
template<typename Function>
void perform_work(Function work_section) {
EASY_FUNCTION();
root_task<Function> master{work_section};
// Push root task on stacks
......
#include <easy/profiler.h>
#include "pls/internal/scheduling/thread_state.h"
#include "pls/internal/scheduling/abstract_task.h"
#include "pls/internal/scheduling/scheduler.h"
......@@ -6,6 +8,7 @@ namespace pls {
namespace internal {
namespace scheduling {
bool abstract_task::steal_work() {
EASY_FUNCTION(profiler::colors::Orange);
auto my_state = base::this_thread::state<thread_state>();
auto my_scheduler = my_state->scheduler_;
......@@ -15,14 +18,19 @@ namespace pls {
auto target_state = my_scheduler->thread_state_for(target);
// TODO: Cleaner Locking Using std::guarded_lock
EASY_BLOCK("Acquire Thread Lock", profiler::colors::Red)
target_state->lock_.lock();
EASY_END_BLOCK;
// Dig down to our level
EASY_BLOCK("Go to our level")
abstract_task* current_task = target_state->root_task_;
while (current_task != nullptr && current_task->depth() < depth()) {
current_task = current_task->child_task_;
}
EASY_END_BLOCK;
EASY_BLOCK("Internal Steal")
if (current_task != nullptr) {
// See if it equals our type and depth of task
if (current_task->unique_id_ == unique_id_ &&
......@@ -37,10 +45,12 @@ namespace pls {
current_task = current_task->child_task_;
}
}
EASY_END_BLOCK;
// Execute 'top level task steal' if possible
// (only try deeper tasks to keep depth restricted stealing)
EASY_BLOCK("Top Level Steal")
while (current_task != nullptr) {
auto lock = &target_state->lock_;
if (current_task->split_task(lock)) {
......@@ -50,6 +60,7 @@ namespace pls {
current_task = current_task->child_task_;
}
EASY_END_BLOCK;
target_state->lock_.unlock();
}
......
#include <easy/profiler.h>
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/fork_join_task.h"
......@@ -16,9 +18,11 @@ namespace pls {
}
void fork_join_sub_task::execute() {
EASY_BLOCK("execute sub_task", profiler::colors::Green);
tbb_task_->currently_executing_ = this;
execute_internal();
tbb_task_->currently_executing_ = nullptr;
EASY_END_BLOCK;
wait_for_all();
if (parent_ != nullptr) {
......@@ -40,13 +44,18 @@ namespace pls {
void fork_join_sub_task::wait_for_all() {
while (ref_count_ > 0) {
EASY_BLOCK("get local sub task", profiler::colors::Blue)
fork_join_sub_task* local_task = tbb_task_->get_local_sub_task();
EASY_END_BLOCK
if (local_task != nullptr) {
local_task->execute();
} else {
// Try to steal work.
// External steal will be executed implicitly if success
if (tbb_task_->steal_work()) {
EASY_BLOCK("steal work", profiler::colors::Blue)
bool internal_steal_success = tbb_task_->steal_work();
EASY_END_BLOCK
if (internal_steal_success) {
tbb_task_->last_stolen_->execute();
}
}
......@@ -63,6 +72,7 @@ namespace pls {
}
bool fork_join_task::internal_stealing(abstract_task* other_task) {
EASY_FUNCTION(profiler::colors::Blue);
auto cast_other_task = reinterpret_cast<fork_join_task*>(other_task);
auto stolen_sub_task = cast_other_task->get_stolen_sub_task();
......@@ -80,6 +90,7 @@ namespace pls {
}
bool fork_join_task::split_task(base::spin_lock* lock) {
EASY_FUNCTION(profiler::colors::Blue);
fork_join_sub_task* stolen_sub_task = get_stolen_sub_task();
if (stolen_sub_task == nullptr) {
return false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment