Commit e0604d1f by FritzFlorian

Add unbalanced tree search benchmark for v3.

parent d5b66aba
Pipeline #1408 failed with stages
in 37 seconds
add_executable(benchmark_unbalanced main.cpp node.h function_node.cpp picosha2.h) add_executable(benchmark_unbalanced_pls_v3 main.cpp)
target_link_libraries(benchmark_unbalanced pls) target_link_libraries(benchmark_unbalanced_pls_v3 benchmark_runner benchmark_base pls)
if (EASY_PROFILER) if (EASY_PROFILER)
target_link_libraries(benchmark_unbalanced easy_profiler) target_link_libraries(benchmark_unbalanced_pls_v3 easy_profiler)
endif () endif ()
#include "node.h"
namespace uts {
node_state node::generate_child_state(uint32_t index) {
node_state result;
picosha2::hash256_one_by_one hasher;
hasher.process(state_.begin(), state_.end());
auto index_begin = reinterpret_cast<uint8_t *>(&index);
hasher.process(index_begin, index_begin + 4);
hasher.finish();
hasher.get_hash_bytes(result.begin(), result.end());
return result;
}
double node::get_state_random() {
int32_t state_random_integer;
uint32_t b = ((uint32_t) state_[16] << 24) |
((uint32_t) state_[17] << 16) |
((uint32_t) state_[18] << 8) |
((uint32_t) state_[19] << 0);
b = b & 0x7fffffff; // Mask out negative values
state_random_integer = static_cast<int32_t>(b);
return (double) state_random_integer / (double) INT32_MAX;
}
}
#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/parallel_result.h" #include "pls/internal/scheduling/static_scheduler_memory.h"
#include "pls/internal/scheduling/scheduler_memory.h"
using namespace pls::internal::scheduling;
#include "node.h" using namespace pls::internal::scheduling;
const int SEED = 42; #include "benchmark_runner.h"
const int ROOT_CHILDREN = 140; #include "benchmark_base/unbalanced.h"
const double Q = 0.124875;
const int NORMAL_CHILDREN = 8;
const int NUM_NODES = 71069; using namespace comparison_benchmarks::base;
parallel_result<int> count_child_nodes(uts::node &node) { #include <atomic>
int child_count = 1;
std::vector<uts::node> children = node.spawn_child_nodes();
if (children.empty()) { int count_child_nodes(unbalanced::node &node) {
return child_count; if (node.get_num_children() < 1) {
return 1;
} }
std::vector<int> results(children.size()); std::atomic<int> count{1};
for (size_t i = 0; i < children.size(); i++) { for (int i = 0; i < node.get_num_children(); i++) {
size_t index = i; scheduler::spawn([i, &count, &node] {
auto lambda = [&, index] { unbalanced::node child_node = node.spawn_child_node(i);
results[index] = count_child_nodes(children[index]); count.fetch_add(count_child_nodes(child_node));
}; });
using child_type = pls::lambda_task_by_value<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
}
pls::scheduler::wait_for_all();
for (auto result : results) {
child_count += result;
} }
scheduler::sync();
return child_count; return count;
} }
parallel_result<int> unbalanced_tree_search(int seed, int root_children, double q, int normal_children) { int unbalanced_tree_search(int seed, int root_children, double q, int normal_children) {
int result; unbalanced::node root(seed, root_children, q, normal_children);
return count_child_nodes(root);
auto lambda = [&] {
uts::node root(seed, root_children, q, normal_children);
result = count_child_nodes(root);
};
using child_type = pls::lambda_task_by_reference<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
pls::scheduler::wait_for_all();
return result;
} }
constexpr size_t MAX_NUM_THREADS = 5; constexpr int MAX_NUM_THREADS = 8;
constexpr int MAX_NUM_TASKS = 256;
constexpr int MAX_STACK_SIZE = 1024 * 2;
constexpr size_t MAX_NUM_TASKS = 128; static_scheduler_memory<MAX_NUM_THREADS,
MAX_NUM_TASKS,
MAX_STACK_SIZE> global_scheduler_memory;
constexpr size_t MAX_NUM_CONTS = 128; int main(int argc, char **argv) {
constexpr size_t MAX_CONT_SIZE = 512; int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
volatile int result; string test_name = to_string(num_threads) + ".csv";
int main() { string full_directory = directory + "/PLS_v3/";
PROFILE_ENABLE benchmark_runner runner{full_directory, test_name};
static_scheduler_memory<MAX_NUM_THREADS,
MAX_NUM_TASKS,
MAX_NUM_CONTS,
MAX_CONT_SIZE> static_scheduler_memory;
scheduler scheduler{static_scheduler_memory, MAX_NUM_THREADS}; scheduler scheduler{global_scheduler_memory, (unsigned) num_threads};
scheduler.perform_work([&]() { runner.run_iterations(unbalanced::NUM_ITERATIONS, [&]() {
return scheduler::par([&]() { scheduler.perform_work([&]() {
return unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN); unbalanced_tree_search(unbalanced::SEED,
}, []() { unbalanced::ROOT_CHILDREN,
return parallel_result<int>{0}; unbalanced::Q,
}).then([](int a, int) { unbalanced::NORMAL_CHILDREN);
result = a;
return parallel_result<int>{0};
}); });
}); }, unbalanced::WARMUP_ITERATIONS);
runner.commit_results(true);
PROFILE_SAVE("test_profile.prof")
} }
//int main() { //int main() {
......
#ifndef UTS_NODE_H
#define UTS_NODE_H
#include <cstdint>
#include <array>
#include <vector>
#include "picosha2.h"
namespace uts {
using node_state = std::array<uint8_t, 20>;
/**
* Node of an unballanced binomial tree (https://www.cs.unc.edu/~olivier/LCPC06.pdf).
* To build up the tree recursivly call spawn_child_nodes on each node until leaves are reached.
* The tree is not built up directly in memory, but rather by the recursive calls.
*/
class node {
// The state is used to allow a deterministic tree construction using sha256 hashes.
node_state state_;
// Set this to a positive number for the root node to start the tree with a specific size
int root_children_;
// general branching factors
double q_;
int b_;
// Private constructor for children
node(node_state state, double q, int b) : state_{state}, root_children_{-1}, q_{q}, b_{b} {}
std::array<uint8_t, 20> generate_child_state(uint32_t index);
double get_state_random();
public:
node(int seed, int root_children, double q, int b) : state_({{}}), root_children_{root_children}, q_{q}, b_{b} {
for (int i = 0; i < 16; i++) {
state_[i] = 0;
}
state_[16] = static_cast<uint8_t>(0xFF & (seed >> 24));
state_[17] = static_cast<uint8_t>(0xFF & (seed >> 16));
state_[18] = static_cast<uint8_t>(0xFF & (seed >> 8));
state_[19] = static_cast<uint8_t>(0xFF & (seed >> 0));
picosha2::hash256_one_by_one hasher;
hasher.process(state_.begin(), state_.end());
hasher.finish();
hasher.get_hash_bytes(state_.begin(), state_.end());
}
std::vector<node> spawn_child_nodes() {
double state_random = get_state_random();
int num_children;
if (root_children_ > 0) {
num_children = root_children_; // Root always spawns children
} else if (state_random < q_) {
num_children = b_;
} else {
num_children = 0;
}
std::vector<node> result;
for (int i = 0; i < num_children; i++) {
result.push_back(node(generate_child_state(i), q_, b_));
}
return result;
}
};
}
#endif //UTS_NODE_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment