Commit 39d9aeee by FritzFlorian

Add unbalanced tree search benchmakr.

parent 18b2d744
......@@ -7,7 +7,7 @@ set(CMAKE_CXX_STANDARD 11)
# seperate library and test/example executable output paths.
set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin)
set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib)
set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib)
# specific setup code is located in individual files.
include(cmake/DisabelInSource.cmake)
......@@ -34,11 +34,12 @@ add_subdirectory(app/playground)
add_subdirectory(app/test_for_new)
add_subdirectory(app/invoke_parallel)
add_subdirectory(app/benchmark_fft)
add_subdirectory(app/benchmark_unbalanced)
# Add optional tests
option(PACKAGE_TESTS "Build the tests" ON)
if(PACKAGE_TESTS)
if (PACKAGE_TESTS)
enable_testing()
add_subdirectory(test)
add_test(NAME AllTests COMMAND tests)
endif()
endif ()
......@@ -8,7 +8,7 @@
static constexpr int CUTOFF = 16;
static constexpr int NUM_ITERATIONS = 1000;
static constexpr int INPUT_SIZE = 2064;
static constexpr int INPUT_SIZE = 8192;
typedef std::vector<std::complex<double>> complex_vector;
void divide(complex_vector::iterator data, int n) {
......
add_executable(benchmark_unbalanced main.cpp node.h node.cpp picosha2.h)
target_link_libraries(benchmark_unbalanced pls)
if (EASY_PROFILER)
target_link_libraries(benchmark_unbalanced easy_profiler)
endif ()
MIT License
Copyright (c) 2017 okdshin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include "node.h"
const int SEED = 42;
const int ROOT_CHILDREN = 140;
const double Q = 0.124875;
const int NORMAL_CHILDREN = 8;
const int NUM_NODES = 71069;
int count_child_nodes(uts::node &node) {
int child_count = 1;
std::vector<uts::node> children = node.spawn_child_nodes();
if (children.empty()) {
return child_count;
}
auto current_task = pls::fork_join_sub_task::current();
std::vector<int> results(children.size());
for (size_t i = 0; i < children.size(); i++) {
size_t index = i;
auto lambda = [&, index] { results[index] = count_child_nodes(children[index]); };
pls::fork_join_lambda_by_value<typeof(lambda)> sub_task(lambda);
current_task->spawn_child(sub_task);
}
current_task->wait_for_all();
for (auto result : results) {
child_count += result;
}
return child_count;
}
int unbalanced_tree_search(int seed, int root_children, double q, int normal_children) {
static auto id = pls::unique_id::create(42);
int result;
auto lambda = [&] {
uts::node root(seed, root_children, q, normal_children);
result = count_child_nodes(root);
};
pls::fork_join_lambda_by_reference<typeof(lambda)> task(lambda);
pls::fork_join_lambda_by_reference<typeof(lambda)> sub_task(lambda);
pls::fork_join_task root_task{&sub_task, id};
pls::scheduler::execute_task(root_task);
return result;
}
int main() {
PROFILE_ENABLE
pls::internal::helpers::run_mini_benchmark([&] {
unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
}, 8, 4000);
PROFILE_SAVE("test_profile.prof")
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 10; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// int result = unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
// std::cout << result << std::endl;
// }
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
#include "node.h"
namespace uts {
node_state node::generate_child_state(uint32_t index) {
node_state result;
picosha2::hash256_one_by_one hasher;
hasher.process(state_.begin(), state_.end());
auto index_begin = reinterpret_cast<uint8_t *>(&index);
hasher.process(index_begin, index_begin + 4);
hasher.finish();
hasher.get_hash_bytes(result.begin(), result.end());
return result;
}
double node::get_state_random() {
int32_t state_random_integer;
uint32_t b = ((uint32_t) state_[16] << 24) |
((uint32_t) state_[17] << 16) |
((uint32_t) state_[18] << 8) |
((uint32_t) state_[19] << 0);
b = b & 0x7fffffff; // Mask out negative values
state_random_integer = static_cast<int32_t>(b);
return (double) state_random_integer / (double) INT32_MAX;
}
}
#ifndef UTS_NODE_H
#define UTS_NODE_H
#include <cstdint>
#include <array>
#include <vector>
#include "picosha2.h"
namespace uts {
using node_state = std::array<uint8_t, 20>;
/**
* Node of an unballanced binomial tree (https://www.cs.unc.edu/~olivier/LCPC06.pdf).
* To build up the tree recursivly call spawn_child_nodes on each node until leaves are reached.
* The tree is not built up directly in memory, but rather by the recursive calls.
*/
class node {
// The state is used to allow a deterministic tree construction using sha256 hashes.
node_state state_;
// Set this to a positive number for the root node to start the tree with a specific size
int root_children_;
// general branching factors
double q_;
int b_;
// Private constructor for children
node(node_state state, double q, int b) : state_{state}, root_children_{-1}, q_{q}, b_{b} {}
std::array<uint8_t, 20> generate_child_state(uint32_t index);
double get_state_random();
public:
node(int seed, int root_children, double q, int b) : state_({{}}), root_children_{root_children}, q_{q}, b_{b} {
for (int i = 0; i < 16; i++) {
state_[i] = 0;
}
state_[16] = static_cast<uint8_t>(0xFF & (seed >> 24));
state_[17] = static_cast<uint8_t>(0xFF & (seed >> 16));
state_[18] = static_cast<uint8_t>(0xFF & (seed >> 8));
state_[19] = static_cast<uint8_t>(0xFF & (seed >> 0));
picosha2::hash256_one_by_one hasher;
hasher.process(state_.begin(), state_.end());
hasher.finish();
hasher.get_hash_bytes(state_.begin(), state_.end());
}
std::vector<node> spawn_child_nodes() {
double state_random = get_state_random();
int num_children;
if (root_children_ > 0) {
num_children = root_children_; // Root always spawns children
} else if (state_random < q_) {
num_children = b_;
} else {
num_children = 0;
}
std::vector<node> result;
for (int i = 0; i < num_children; i++) {
result.push_back(node(generate_child_state(i), q_, b_));
}
return result;
}
};
}
#endif //UTS_NODE_H
......@@ -6,7 +6,7 @@
#include <vector>
static constexpr int CUTOFF = 16;
static constexpr int INPUT_SIZE = 2064;
static constexpr int INPUT_SIZE = 8192;
typedef std::vector<std::complex<double>> complex_vector;
void divide(complex_vector::iterator data, int n) {
......
......@@ -2,6 +2,7 @@
#ifndef PLS_INVOKE_PARALLEL_IMPL_H
#define PLS_INVOKE_PARALLEL_IMPL_H
#include <pls/internal/scheduling/fork_join_task.h>
#include "pls/internal/scheduling/fork_join_task.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/helpers/unique_id.h"
......@@ -19,8 +20,7 @@ inline void run_body(const Body &internal_body, const abstract_task::id &id) {
// if not we will spawn it as a new 'fork-join-style' task.
auto current_task = scheduler::current_task();
if (current_task->unique_id() == id) {
auto current_sub_task = reinterpret_cast<fork_join_task *>(current_task)->currently_executing();
internal_body(current_sub_task);
internal_body();
} else {
fork_join_lambda_by_reference<Body> root_body(&internal_body);
fork_join_task root_task{&root_body, id};
......@@ -37,8 +37,7 @@ void invoke_parallel(const Function1 &function1, const Function2 &function2) {
static abstract_task::id id = unique_id::create<Function1, Function2>();
auto internal_body = [&](fork_join_sub_task *this_task) {
auto sub_task_body_2 = [&](fork_join_sub_task *) { function2(); };
auto sub_task_2 = fork_join_lambda_by_reference<decltype(sub_task_body_2)>(&sub_task_body_2);
auto sub_task_2 = fork_join_lambda_by_reference<Function2>(function2);
this_task->spawn_child(sub_task_2);
function1(); // Execute first function 'inline' without spawning a sub_task object
......@@ -54,16 +53,15 @@ void invoke_parallel(const Function1 &function1, const Function2 &function2, con
using namespace ::pls::internal::helpers;
static abstract_task::id id = unique_id::create<Function1, Function2, Function3>();
auto internal_body = [&](fork_join_sub_task *this_task) {
auto sub_task_body_2 = [&](fork_join_sub_task *) { function2(); };
auto sub_task_2 = fork_join_lambda_by_reference<decltype(sub_task_body_2)>(&sub_task_body_2);
auto sub_task_body_3 = [&](fork_join_sub_task *) { function3(); };
auto sub_task_3 = fork_join_lambda_by_reference<decltype(sub_task_body_3)>(&sub_task_body_3);
auto internal_body = [&]() {
auto current_task = fork_join_sub_task::current();
auto sub_task_2 = fork_join_lambda_by_reference<Function2>(function2);
auto sub_task_3 = fork_join_lambda_by_reference<Function3>(function3);
this_task->spawn_child(sub_task_2);
this_task->spawn_child(sub_task_3);
current_task->spawn_child(sub_task_2);
current_task->spawn_child(sub_task_3);
function1(); // Execute first function 'inline' without spawning a sub_task object
this_task->wait_for_all();
current_task->wait_for_all();
};
internal::run_body(internal_body, id);
......
......@@ -43,20 +43,21 @@ class fork_join_sub_task {
void spawn_child(T &sub_task);
void wait_for_all();
static fork_join_sub_task *current();
private:
void execute();
};
template<typename Function>
class fork_join_lambda_by_reference : public fork_join_sub_task {
const Function *function_;
const Function &function_;
public:
explicit fork_join_lambda_by_reference(const Function *function) : fork_join_sub_task{}, function_{function} {};
explicit fork_join_lambda_by_reference(const Function &function) : fork_join_sub_task{}, function_{function} {};
protected:
void execute_internal() override {
(*function_)(this);
function_();
}
};
......@@ -69,7 +70,7 @@ class fork_join_lambda_by_value : public fork_join_sub_task {
protected:
void execute_internal() override {
function_(this);
function_();
}
};
......
......@@ -18,6 +18,8 @@ using task_id = internal::scheduling::abstract_task::id;
using unique_id = internal::helpers::unique_id;
using internal::scheduling::fork_join_sub_task;
using internal::scheduling::fork_join_lambda_by_reference;
using internal::scheduling::fork_join_lambda_by_value;
using internal::scheduling::fork_join_task;
using algorithm::invoke_parallel;
......
......@@ -66,6 +66,10 @@ fork_join_sub_task *fork_join_task::get_stolen_sub_task() {
return deque_.pop_head();
}
fork_join_sub_task *fork_join_sub_task::current() {
return dynamic_cast<fork_join_task *>(scheduler::current_task())->currently_executing();
}
bool fork_join_task::internal_stealing(abstract_task *other_task) {
PROFILE_STEALING("fork_join_task::internal_stealin")
auto cast_other_task = reinterpret_cast<fork_join_task *>(other_task);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment