Commit 79ac0243 by FritzFlorian

Add two 'standardized' benchmarks.

parent 2f539691
Pipeline #1371 failed with stages
in 37 seconds
...@@ -26,6 +26,9 @@ list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake") ...@@ -26,6 +26,9 @@ list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake")
# Each library has an own CMakeLists.txt that should make it avaliabale as a library target, # Each library has an own CMakeLists.txt that should make it avaliabale as a library target,
# thus allowing one to include it as any cmake dependency later on. # thus allowing one to include it as any cmake dependency later on.
add_subdirectory(extern/catch2) add_subdirectory(extern/catch2)
add_subdirectory(extern/picosha2)
add_subdirectory(extern/benchmark_base)
add_subdirectory(extern/benchmark_runner)
# Include all internal subprojects (library, examples, testing). # Include all internal subprojects (library, examples, testing).
add_subdirectory(lib/pls) add_subdirectory(lib/pls)
......
add_executable(benchmark_fft main.cpp) add_executable(benchmark_fft_pls_v2 main.cpp)
target_link_libraries(benchmark_fft pls) target_link_libraries(benchmark_fft_pls_v2 pls benchmark_runner benchmark_base)
if(EASY_PROFILER) if (EASY_PROFILER)
target_link_libraries(benchmark_fft easy_profiler) target_link_libraries(benchmark_fft_pls_v2 easy_profiler)
endif() endif ()
...@@ -2,142 +2,91 @@ ...@@ -2,142 +2,91 @@
#include "pls/internal/scheduling/parallel_result.h" #include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/scheduler_memory.h" #include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/helpers/profiler.h" #include "pls/internal/helpers/profiler.h"
using namespace pls::internal::scheduling; using namespace pls::internal::scheduling;
#include <iostream> #include <iostream>
#include <complex> #include <complex>
#include <vector> #include <vector>
#include <atomic>
static constexpr int CUTOFF = 16;
static constexpr int INPUT_SIZE = 16384;
typedef std::vector<std::complex<double>> complex_vector;
void divide(complex_vector::iterator data, int n) {
complex_vector tmp_odd_elements(n / 2);
for (int i = 0; i < n / 2; i++) {
tmp_odd_elements[i] = data[i * 2 + 1];
}
for (int i = 0; i < n / 2; i++) {
data[i] = data[i * 2];
}
for (int i = 0; i < n / 2; i++) {
data[i + n / 2] = tmp_odd_elements[i];
}
}
void combine(complex_vector::iterator data, int n) { #include "benchmark_runner.h"
for (int i = 0; i < n / 2; i++) { #include "benchmark_base/fft.h"
std::complex<double> even = data[i];
std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
data[i] = even + w * odd;
data[i + n / 2] = even - w * odd;
}
}
void fft_normal(complex_vector::iterator data, int n) {
if (n < 2) {
return;
}
divide(data, n); using namespace comparison_benchmarks::base;
fft_normal(data, n / 2);
fft_normal(data + n / 2, n / 2);
combine(data, n);
}
parallel_result<short> fft(complex_vector::iterator data, int n) { parallel_result<short> conquer(fft::complex_vector::iterator data, int n) {
if (n < 2) { if (n < 2) {
return parallel_result<short>{0}; return parallel_result<short>{0};
} }
divide(data, n); fft::divide(data, n);
if (n <= CUTOFF) { if (n <= fft::RECURSIVE_CUTOFF) {
fft_normal(data, n / 2); fft::conquer(data, n / 2);
fft_normal(data + n / 2, n / 2); fft::conquer(data + n / 2, n / 2);
combine(data, n); fft::combine(data, n);
return parallel_result<short>{0}; return parallel_result<short>{0};
} else { } else {
return scheduler::par([=]() { return scheduler::par([=]() {
return fft(data, n / 2); return conquer(data, n / 2);
}, [=]() { }, [=]() {
return fft(data + n / 2, n / 2); return conquer(data + n / 2, n / 2);
}).then([=](int, int) { }).then([=](int, int) {
combine(data, n); fft::combine(data, n);
return parallel_result<short>{0}; return parallel_result<short>{0};
}); });
} }
} }
complex_vector prepare_input(int input_size) { constexpr int MAX_NUM_THREADS = 8;
std::vector<double> known_frequencies{2, 11, 52, 88, 256}; constexpr int MAX_NUM_TASKS = 64;
complex_vector data(input_size); constexpr int MAX_NUM_CONTS = 64;
constexpr int MAX_CONT_SIZE = 256;
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for (int i = 0; i < input_size; i++) {
data[i] = std::complex<double>(0.0, 0.0);
for (auto frequencie : known_frequencies) {
data[i] += sin(2 * M_PI * frequencie * i / input_size);
}
}
return data;
}
static constexpr int NUM_ITERATIONS = 500; int main(int argc, char **argv) {
constexpr size_t NUM_THREADS = 2; int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
constexpr size_t NUM_TASKS = 128; string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v2/";
benchmark_runner runner{full_directory, test_name};
constexpr size_t NUM_CONTS = 128; fft::complex_vector data = fft::generate_input();
constexpr size_t MAX_CONT_SIZE = 512;
int main() { static_scheduler_memory<MAX_NUM_THREADS,
PROFILE_ENABLE; MAX_NUM_TASKS,
complex_vector initial_input = prepare_input(INPUT_SIZE); MAX_NUM_CONTS,
static_scheduler_memory<NUM_THREADS,
NUM_TASKS,
NUM_CONTS,
MAX_CONT_SIZE> static_scheduler_memory; MAX_CONT_SIZE> static_scheduler_memory;
scheduler scheduler{static_scheduler_memory, NUM_THREADS}; scheduler scheduler{static_scheduler_memory, (unsigned int) num_threads};
auto start = std::chrono::steady_clock::now(); for (int i = 0; i < fft::NUM_WARMUP_ITERATIONS; i++) {
for (int i = 0; i < NUM_ITERATIONS; i++) {
complex_vector input_2(initial_input);
scheduler.perform_work([&]() { scheduler.perform_work([&]() {
PROFILE_MAIN_THREAD;
return scheduler::par([&]() { return scheduler::par([&]() {
return fft(input_2.begin(), INPUT_SIZE); return conquer(data.begin(), fft::SIZE);
}, []() { }, []() {
return parallel_result<int>{0}; return parallel_result<short>{0};
}).then([](int, int) { }).then([&](short, short) {
return parallel_result<int>{0}; return parallel_result<int>{0};
}); });
}); });
PROFILE_LOCK("DONE");
} }
auto end = std::chrono::steady_clock::now();
std::cout << "Framework: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() for (int i = 0; i < fft::NUM_ITERATIONS; i++) {
<< std::endl; scheduler.perform_work([&]() {
PROFILE_SAVE("test_profile.prof"); runner.start_iteration();
start = std::chrono::steady_clock::now(); return scheduler::par([&]() {
for (int i = 0; i < NUM_ITERATIONS; i++) { return conquer(data.begin(), fft::SIZE);
complex_vector input_1(initial_input); }, []() {
fft_normal(input_1.begin(), INPUT_SIZE); return parallel_result<short>{0};
}).then([&](short, short) {
runner.end_iteration();
return parallel_result<int>{0};
});
});
} }
end = std::chrono::steady_clock::now(); runner.commit_results(true);
std::cout << "Normal: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
<< std::endl;
return 0; return 0;
} }
add_executable(benchmark_matrix main.cpp) add_executable(benchmark_matrix_pls_v2 main.cpp)
target_link_libraries(benchmark_matrix pls) target_link_libraries(benchmark_matrix_pls_v2 pls benchmark_runner benchmark_base)
if (EASY_PROFILER) if (EASY_PROFILER)
target_link_libraries(benchmark_matrix easy_profiler) target_link_libraries(benchmark_matrix_pls_v2 easy_profiler)
endif () endif ()
...@@ -2,112 +2,78 @@ ...@@ -2,112 +2,78 @@
#include "pls/internal/scheduling/parallel_result.h" #include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/scheduler_memory.h" #include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/algorithms/for_each.h" #include "pls/algorithms/for_each.h"
using namespace pls::internal::scheduling; using namespace pls::internal::scheduling;
#include <chrono> #include "benchmark_runner.h"
#include "benchmark_base/matrix.h"
const int MATRIX_SIZE = 128; using namespace comparison_benchmarks::base;
template<typename T, int SIZE> template<typename T, int SIZE>
class matrix { class pls_matrix : public matrix::matrix<T, SIZE> {
public: public:
T data[SIZE][SIZE]; pls_matrix() : matrix::matrix<T, SIZE>() {}
explicit matrix(T i = 1) {
std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i);
}
parallel_result<int> multiply(const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) { parallel_result<int> pls_multiply(const matrix::matrix<T, SIZE> &a, const matrix::matrix<T, SIZE> &b) {
return pls::algorithm::for_each_range(0, SIZE, [&](int i) { return pls::algorithm::for_each_range(0, SIZE, [this, &a, &b](int i) {
this->multiply_column(i, a, b); this->multiply_column(i, a, b);
}); });
} }
private:
void multiply_column(int i, const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
for (int j = 0; j < SIZE; ++j) {
data[i][j] = 0;
}
for (int k = 0; k < SIZE; ++k) {
for (int j = 0; j < SIZE; ++j) {
data[i][j] += a.data[i][k] * b.data[k][j];
}
}
}
}; };
void fill_with_data(matrix<double, MATRIX_SIZE> &a, matrix<double, MATRIX_SIZE> &b) { constexpr size_t MAX_NUM_THREADS = 8;
// Fill in some data... constexpr size_t MAX_NUM_TASKS = 32;
for (int i = 0; i < MATRIX_SIZE; i++) { constexpr size_t MAX_NUM_CONTS = 32;
for (int j = 0; j < MATRIX_SIZE; j++) { constexpr size_t MAX_CONT_SIZE = 512;
a.data[i][j] = i;
b.data[i][j] = j;
}
}
}
static constexpr int NUM_ITERATIONS = 1000; int main(int argc, char **argv) {
constexpr size_t NUM_THREADS = 3; int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
constexpr size_t NUM_TASKS = 128; string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v2/";
benchmark_runner runner{full_directory, test_name};
constexpr size_t NUM_CONTS = 128; pls_matrix<double, matrix::MATRIX_SIZE> a;
constexpr size_t MAX_CONT_SIZE = 512; pls_matrix<double, matrix::MATRIX_SIZE> b;
pls_matrix<double, matrix::MATRIX_SIZE> result;
int main() { static_scheduler_memory<MAX_NUM_THREADS,
PROFILE_ENABLE MAX_NUM_TASKS,
matrix<double, MATRIX_SIZE> a; MAX_NUM_CONTS,
matrix<double, MATRIX_SIZE> b;
matrix<double, MATRIX_SIZE> result;
fill_with_data(a, b);
static_scheduler_memory<NUM_THREADS,
NUM_TASKS,
NUM_CONTS,
MAX_CONT_SIZE> static_scheduler_memory; MAX_CONT_SIZE> static_scheduler_memory;
scheduler scheduler{static_scheduler_memory, NUM_THREADS}; scheduler scheduler{static_scheduler_memory, (unsigned int) num_threads};
for (int i = 0; i < matrix::WARMUP_ITERATIONS; i++) {
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < NUM_ITERATIONS; i++) {
scheduler.perform_work([&]() { scheduler.perform_work([&]() {
PROFILE_MAIN_THREAD;
return scheduler::par([&]() { return scheduler::par([&]() {
return result.multiply(a, b); return result.pls_multiply(a, b);
}, []() { }, []() {
return parallel_result<int>{0}; return parallel_result<int>{0};
}).then([](int, int) { }).then([&](int, int) {
return parallel_result<int>{0}; return parallel_result<int>{0};
}); });
}); });
} }
auto end = std::chrono::steady_clock::now();
std::cout << "Framework: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
<< std::endl;
}
//int main() { for (int i = 0; i < matrix::NUM_ITERATIONS; i++) {
// PROFILE_ENABLE scheduler.perform_work([&]() {
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u}; runner.start_iteration();
// pls::scheduler scheduler{&my_scheduler_memory, 4};
//
// matrix<double, MATRIX_SIZE> a;
// matrix<double, MATRIX_SIZE> b;
// matrix<double, MATRIX_SIZE> result;
// fill_with_data(a, b);
//
// scheduler.perform_work([&] {
// auto start_time = std::chrono::high_resolution_clock::now();
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 10000; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// result.multiply(a, b);
// }
// auto end_time = std::chrono::high_resolution_clock::now();
// long time = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count();
// std::cout << "Runtime: " << time << "us" << std::endl;
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
return scheduler::par([&]() {
return result.pls_multiply(a, b);
}, []() {
return parallel_result<int>{0};
}).then([&](int, int) {
runner.end_iteration();
return parallel_result<int>{0};
});
});
}
runner.commit_results(true);
}
...@@ -51,22 +51,22 @@ parallel_result<int> unbalanced_tree_search(int seed, int root_children, double ...@@ -51,22 +51,22 @@ parallel_result<int> unbalanced_tree_search(int seed, int root_children, double
return result; return result;
} }
constexpr size_t NUM_THREADS = 5; constexpr size_t MAX_NUM_THREADS = 5;
constexpr size_t NUM_TASKS = 128; constexpr size_t MAX_NUM_TASKS = 128;
constexpr size_t NUM_CONTS = 128; constexpr size_t MAX_NUM_CONTS = 128;
constexpr size_t MAX_CONT_SIZE = 512; constexpr size_t MAX_CONT_SIZE = 512;
volatile int result; volatile int result;
int main() { int main() {
PROFILE_ENABLE PROFILE_ENABLE
static_scheduler_memory<NUM_THREADS, static_scheduler_memory<MAX_NUM_THREADS,
NUM_TASKS, MAX_NUM_TASKS,
NUM_CONTS, MAX_NUM_CONTS,
MAX_CONT_SIZE> static_scheduler_memory; MAX_CONT_SIZE> static_scheduler_memory;
scheduler scheduler{static_scheduler_memory, NUM_THREADS}; scheduler scheduler{static_scheduler_memory, MAX_NUM_THREADS};
scheduler.perform_work([&]() { scheduler.perform_work([&]() {
return scheduler::par([&]() { return scheduler::par([&]() {
......
...@@ -8,12 +8,12 @@ ...@@ -8,12 +8,12 @@
using namespace pls::internal; using namespace pls::internal;
constexpr size_t NUM_THREADS = 4; constexpr size_t MAX_NUM_THREADS = 1;
constexpr size_t NUM_TASKS = 128; constexpr size_t MAX_NUM_TASKS = 128;
static constexpr int NUM_ITERATIONS = 100; static constexpr int NUM_ITERATIONS = 10;
constexpr size_t NUM_CONTS = 128; constexpr size_t MAX_NUM_CONTS = 128;
constexpr size_t MAX_CONT_SIZE = 256; constexpr size_t MAX_CONT_SIZE = 256;
int fib_normal(int n) { int fib_normal(int n) {
...@@ -29,8 +29,13 @@ int fib_normal(int n) { ...@@ -29,8 +29,13 @@ int fib_normal(int n) {
} }
scheduling::parallel_result<int> fib(int n) { scheduling::parallel_result<int> fib(int n) {
if (n <= 10) { pls::variable<int> i;
return fib_normal(n); pls::array<int> a{10};
if (n == 0) {
return 0;
}
if (n == 1) {
return 1;
} }
return scheduling::scheduler::par([=]() { return scheduling::scheduler::par([=]() {
...@@ -45,12 +50,12 @@ scheduling::parallel_result<int> fib(int n) { ...@@ -45,12 +50,12 @@ scheduling::parallel_result<int> fib(int n) {
static volatile int result; static volatile int result;
int main() { int main() {
PROFILE_ENABLE; PROFILE_ENABLE;
scheduling::static_scheduler_memory<NUM_THREADS, scheduling::static_scheduler_memory<MAX_NUM_THREADS,
NUM_TASKS, MAX_NUM_TASKS,
NUM_CONTS, MAX_NUM_CONTS,
MAX_CONT_SIZE> static_scheduler_memory; MAX_CONT_SIZE> static_scheduler_memory;
scheduling::scheduler scheduler{static_scheduler_memory, NUM_THREADS}; scheduling::scheduler scheduler{static_scheduler_memory, MAX_NUM_THREADS};
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
for (int i = 0; i < NUM_ITERATIONS; i++) { for (int i = 0; i < NUM_ITERATIONS; i++) {
......
# Configuration and common algorithm pieces for benchmarks
configure_file(src/sample_images.cpp.in sample_images.cpp)
add_library(benchmark_base STATIC
${CMAKE_CURRENT_BINARY_DIR}/sample_images.cpp
src/fft.cpp include/benchmark_base/fft.h
include/benchmark_base/heat.h
include/benchmark_base/matrix.h
include/benchmark_base/unbalanced.h src/unbalanced.cpp
include/benchmark_base/range.h)
target_include_directories(benchmark_base
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
target_link_libraries(benchmark_base picosha2)
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
#ifndef COMPARISON_BENCHMARKS_BASE_FFT_H
#define COMPARISON_BENCHMARKS_BASE_FFT_H
#include <complex>
#include <string>
#include <vector>
namespace comparison_benchmarks {
namespace base {
namespace fft {
const int SIZE = 8192;
const int NUM_ITERATIONS = 1000;
const int NUM_WARMUP_ITERATIONS = 100;
const int RECURSIVE_CUTOFF = 32;
typedef std::vector<std::complex<double>> complex_vector;
complex_vector generate_input();
void divide(complex_vector::iterator data, int n);
void conquer(complex_vector::iterator data, int n);
void combine(complex_vector::iterator data, int n);
}
}
}
#endif //COMPARISON_BENCHMARKS_BASE_FFT_H
#ifndef COMPARISON_BENCHMARKS_BASE_HEAT_H
#define COMPARISON_BENCHMARKS_BASE_HEAT_H
#include <array>
#include <iostream>
#include <memory>
namespace comparison_benchmarks {
namespace base {
namespace heat {
const int DIFFUSION_SIZE = 256;
const int DIFFUSION_STEPS = 256;
const int NUM_ITERATIONS = 100;
const int WARMUP_ITERATIONS = 20;
template<typename T, int SIZE>
class heat_diffusion {
// Center portion is SIZExSIZE, borders are fixed temperature values
using matrix = std::array<std::array<T, SIZE + 2>, SIZE + 2>;
protected:
// Sane default values for the simulation (form paper).
// This is not about perfect simulation results but the speedup of the workload.
double c = 0.1;
double d_s = 1.0 / (SIZE + 1);
double d_t = (d_s * d_s) / (4 * c);
public:
matrix *current_data;
matrix *next_data;
explicit heat_diffusion() {
current_data = new matrix;
next_data = new matrix;
reset_data();
}
~heat_diffusion() {
delete current_data;
delete next_data;
}
virtual void run_simulation(int n) {
for (int i = 0; i < n; i++) {
for (int row = 1; row <= SIZE; row++) {
for (int column = 1; column <= SIZE; column++) {
update_element(row, column);
}
}
// Synchronization point needed to coordinate the calculation!
swap_data_arrays();
}
}
protected:
void update_element(int row, int column) {
(*next_data)[row][column] = (*current_data)[row][column] + ((c * d_t) / (d_s * d_s)) *
((*current_data)[row + 1][column] + (*current_data)[row - 1][column]
- 4 * (*current_data)[row][column]
+ (*current_data)[row][column + 1] + (*current_data)[row][column - 1]);
}
void swap_data_arrays() {
matrix *tmp = current_data;
current_data = next_data;
next_data = tmp;
}
void reset_data() {
for (int row = 0; row < SIZE + 2; row++) {
for (int column = 0; column < SIZE + 2; column++) {
(*current_data)[row][column] = 0.0;
(*next_data)[row][column] = 0.0;
// Edges are a fixed, hot temperature
if (row == 0 || row == SIZE + 1) {
(*current_data)[row][column] = 1.0;
(*next_data)[row][column] = 1.0;
}
}
}
}
};
template<typename T, int SIZE>
std::ostream &operator<<(std::ostream &strm, const heat_diffusion<T, SIZE> &simulation) {
for (int i = 0; i < SIZE + 2; i++) {
for (int j = 0; j < SIZE + 2; j++) {
// 'color' our output according to temperature
char out;
if (simulation.current_data[i][j] < 0.1) {
out = ' ';
} else if (simulation.current_data[i][j] < 0.2) {
out = '-';
} else if (simulation.current_data[i][j] < 0.5) {
out = '=';
} else {
out = '#';
}
strm << out << "\t";
}
strm << std::endl;
}
return strm;
}
}
}
}
#endif //COMPARISON_BENCHMARKS_BASE_HEAT_H
#ifndef COMPARISON_BENCHMARKS_BASE_MATRIX_H
#define COMPARISON_BENCHMARKS_BASE_MATRIX_H
#include <algorithm>
#include <iostream>
namespace comparison_benchmarks {
namespace base {
namespace matrix {
const int MATRIX_SIZE = 128;
const int NUM_ITERATIONS = 5000;
const int WARMUP_ITERATIONS = 1000;
template<typename T, int SIZE>
class matrix {
public:
T data[SIZE][SIZE];
explicit matrix() {
for (int i = 0; i < MATRIX_SIZE; i++) {
for (int j = 0; j < MATRIX_SIZE; j++) {
data[i][j] = i;
}
}
}
virtual void multiply(const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
for (int i = 0; i < SIZE; i++) {
multiply_column(i, a, b);
}
}
protected:
void multiply_column(int i, const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
for (int j = 0; j < SIZE; ++j) {
data[i][j] = 0;
}
for (int k = 0; k < SIZE; ++k) {
for (int j = 0; j < SIZE; ++j) {
T a_data = a.data[i][k];
T b_data = b.data[k][j];
data[i][j] += a_data * b_data;
}
}
}
};
template<typename T, int SIZE>
std::ostream &operator<<(std::ostream &strm, const matrix<T, SIZE> &matrix) {
for (int i = 0; i < SIZE; i++) {
for (int j = 0; j < SIZE; j++) {
strm << matrix.data[i][j] << "\t";
}
strm << std::endl;
}
return strm;
}
}
}
}
#endif //COMPARISON_BENCHMARKS_BASE_MATRIX_H
/*
Range
=====
Copyright (c) 2009-2011 Khaled Alshaya
Distributed under the Boost Software License, version 1.0
(See the license at: http://www.boost.org/license_1_0.txt).
*/
/*
Rationale
=========
In Python, there is a beautiful function called "range".
"range" allows the programmer to iterate over a range elegantly.
This concept is not as general as "for-loops" in C++,
but non the less, it expresses the intent of the programmer
clearer than the general "for-loops" in many cases.
Design
======
Range is made to be STL-like library. In fact, it is
built on top of the concepts of STL. The library is designed to
work with STL algorithms as well. Range is more flexible
than the Python "range", because:
Range is an "immutable ordered random access container"
Specifications
==============
Range satisfies the following requirements:
* Immutable.
* Random Access Container.
* Random Access Iterator Interface.
* Constant Time Complexity Operations.
Range models an ordered sequence of elements,
where a range is defined by:
[begin, end)
* begin: the first element in the range. (Inclusive)
* end : the last element in the range. (Exclusive)
* step : the distance between two consecutive elements in a range.
where each element in the range is defined by:
element = begin + step * i
* i: is the index of the element in range.
The following precondition must be met for the sequence
to be a valid range:
step != 0
&&
(
begin <= end && step > 0
||
begin >= end && step < 0
)
Portability
===========
Range Generator is written in standard C++ (C++98). It depends
-only- on the standard C++ library.
*/
// TODO: See if we should swap this out for our own implementation, for now this is fine, as it is self contained.
/**
* Notes on Modification:
* The code was adpated to fit into our namespacing/naming scheme for simpler use.
* This includes ifdef's, namespace and code formatting style.
*/
#ifndef Range_h__
#define Range_h__
#include <iterator>
#include <stdexcept>
#include <cstddef>
#include <cmath>
namespace comparison_benchmarks {
namespace base {
namespace range {
template<class IntegerType>
struct basic_range {
struct const_iterator_impl {
typedef IntegerType value_type;
typedef std::size_t size_type;
typedef IntegerType difference_type;
typedef value_type *pointer;
typedef value_type &reference;
typedef
std::random_access_iterator_tag
iterator_category;
const_iterator_impl() : r(0), index(0) {}
const_iterator_impl(const const_iterator_impl &rhs)
: r(rhs.r), index(rhs.index) {}
const_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index)
: r(p_range), index(p_index) {}
const_iterator_impl &operator=(const const_iterator_impl &rhs) {
r = rhs.r;
index = rhs.index;
return *this;
}
bool operator==(const const_iterator_impl &rhs) const {
return *r == *(rhs.r) && index == rhs.index;
}
bool operator!=(const const_iterator_impl &rhs) const {
return !(*this == rhs);
}
bool operator<(const const_iterator_impl &rhs) const {
return index < rhs.index;
}
bool operator>(const const_iterator_impl &rhs) const {
return index > rhs.index;
}
bool operator<=(const const_iterator_impl &rhs) const {
return index <= rhs.index;
}
bool operator>=(const const_iterator_impl &rhs) const {
return index >= rhs.index;
}
value_type operator*() const {
return r->m_first_element + r->m_step * index;
}
// operator->
// is not implemented because the value_type is an integer type
// and primitive types in C++ don't define member functions.
const_iterator_impl &operator++() {
++index;
return *this;
}
const_iterator_impl operator++(int) {
const_iterator_impl temp = *this;
++index;
return temp;
}
const_iterator_impl &operator--() {
--index;
return *this;
}
const_iterator_impl operator--(int) {
const_iterator_impl temp = *this;
--index;
return temp;
}
const_iterator_impl &operator+=(difference_type increment) {
index += increment;
return *this;
}
// operator+
// is friend operator but operator-
// is not, because we want to allow the following for "+":
// iterator+5
// 5+iterator
// For the "-" it is not correct to do so, because
// iterator-5 != 5-iterator
friend const_iterator_impl operator+
(const const_iterator_impl &lhs, difference_type increment) {
const_iterator_impl sum;
sum.r = lhs.r;
sum.index = lhs.index + increment;
return sum;
}
const_iterator_impl &operator-=(difference_type decrement) {
index -= decrement;
return *this;
}
const_iterator_impl operator-(difference_type decrement) const {
const_iterator_impl shifted_iterator;
shifted_iterator.r = r;
shifted_iterator.index = index - decrement;
return shifted_iterator;
}
difference_type operator-(const const_iterator_impl &rhs) const {
return index - rhs.index;
}
value_type operator[](difference_type offset) const {
size_type new_index = index + offset;
return r->m_first_element + r->m_step * new_index;
}
private:
basic_range<IntegerType> const *r;
size_type index;
};
struct const_reverse_iterator_impl {
typedef IntegerType value_type;
typedef std::size_t size_type;
typedef IntegerType difference_type;
typedef value_type *pointer;
typedef value_type &reference;
typedef
std::random_access_iterator_tag
iterator_category;
const_reverse_iterator_impl() : r(0), index(0) {}
const_reverse_iterator_impl(const const_reverse_iterator_impl &rhs)
: r(rhs.r), index(rhs.index) {}
const_reverse_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index)
: r(p_range), index(p_index) {}
const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) {
r = rhs.r;
index = rhs.index;
return *this;
}
bool operator==(const const_reverse_iterator_impl &rhs) const {
return *r == *(rhs.r) && index == rhs.index;
}
bool operator!=(const const_reverse_iterator_impl &rhs) const {
return !(*this == rhs);
}
bool operator<(const const_reverse_iterator_impl &rhs) const {
return index < rhs.index;
}
bool operator>(const const_reverse_iterator_impl &rhs) const {
return index > rhs.index;
}
bool operator<=(const const_reverse_iterator_impl &rhs) const {
return index <= rhs.index;
}
bool operator>=(const const_reverse_iterator_impl &rhs) const {
return index >= rhs.index;
}
value_type operator*() const {
size_type reverse_index
= (r->m_element_count - 1) - index;
return r->m_first_element + r->m_step * reverse_index;
}
// operator->
// is not implemented because the value_type is integer type
// and primitive types in C++ don't define member functions.
const_reverse_iterator_impl &operator++() {
++index;
return *this;
}
const_reverse_iterator_impl operator++(int) {
const_reverse_iterator_impl temp = *this;
++index;
return temp;
}
const_reverse_iterator_impl &operator--() {
--index;
return *this;
}
const_reverse_iterator_impl operator--(int) {
const_reverse_iterator_impl temp = *this;
--index;
return temp;
}
const_reverse_iterator_impl &operator+=(difference_type increment) {
index += increment;
return *this;
}
// operator+
// is friend operator but operator-
// is not, because we want to allow the following for "+":
// iterator+5
// 5+iterator
// For the "-" it is not correct to do so, because
// iterator-5 != 5-iterator
friend const_reverse_iterator_impl operator+
(const const_reverse_iterator_impl &lhs, difference_type increment) {
const_reverse_iterator_impl sum;
sum.r = lhs.r;
sum.index = lhs.index + increment;
return sum;
}
const_reverse_iterator_impl &operator-=(difference_type decrement) {
index -= decrement;
return *this;
}
const_reverse_iterator_impl operator-(difference_type decrement) const {
const_reverse_iterator_impl shifted_iterator;
shifted_iterator.r = r;
shifted_iterator.index = index - decrement;
return shifted_iterator;
}
difference_type operator-(const const_reverse_iterator_impl &rhs) const {
return index - rhs.index;
}
value_type operator[](difference_type offset) const {
size_type new_reverse_index
= (r->m_element_count - 1) - (index + offset);
return r->m_first_element + r->m_step * new_reverse_index;
}
private:
basic_range<IntegerType> const *r;
size_type index;
};
typedef IntegerType value_type;
typedef const_iterator_impl iterator;
typedef const_iterator_impl const_iterator;
typedef const_reverse_iterator_impl reverse_iterator;
typedef const_reverse_iterator_impl const_reverse_iterator;
typedef value_type &reference;
typedef const value_type &const_reference;
typedef value_type *pointer;
typedef IntegerType difference_type;
typedef std::size_t size_type;
// In the case of default construction,
// the range is considered as an empty range with no elements.
// step can be anything other than 0. 1 is
// an implementation convention, and it doesn't have
// a significance in this case because the range is empty.
basic_range() : m_first_element(0), m_element_count(0), m_step(1) {}
// first_element: is begin in specifications.
// last_element: is end in specifications.
basic_range(value_type first_element, value_type last_element, value_type step)
: m_first_element(first_element),
m_step(step) {
// We need to count the number of elements.
// The only case where a range is invalid,
// when the step=0. It means that the range
// is infinite, because the number of elements
// in a range, is the length of that range
// divided by the difference between
// every two successive elements.
if (step == 0)
throw std::out_of_range("Invalid Range: step can't be equal to zero!");
if (first_element < last_element && step < 0)
throw std::out_of_range("Invalid Range: step can't be backward, while the range is forward!");
if (first_element > last_element && step > 0)
throw std::out_of_range("Invalid Range: step can't be forward, while the range is backward!");
m_element_count = (last_element - first_element) / step;
if ((last_element - first_element) % step != 0)
++m_element_count;
}
// The following constructor, determines the step
// automatically. If the range is forward, then
// step will be one. If the range is backward,
// step will be minus one. If the begin is equal
// to end, then the step must not equal to zero
// and it is set to one as a convention.
basic_range(value_type first_element, value_type last_element)
: m_first_element(first_element) {
if (last_element >= first_element) *this = basic_range<IntegerType>(first_element, last_element, 1);
else *this = basic_range<IntegerType>(first_element, last_element, -1);
}
// The following constructor is a shortcut
// if you want the first element as zero.
// the step is determined automatically, based
// on the last element. If the last element is
// positive, then step is one, but if it is negative
// then step is minus one.
basic_range<IntegerType>(value_type last_element)
: m_first_element(0) {
if (last_element >= m_first_element) *this = basic_range<IntegerType>(m_first_element, last_element, 1);
else *this = basic_range<IntegerType>(m_first_element, last_element, -1);
}
basic_range<IntegerType>(const basic_range<IntegerType> &r)
: m_first_element(r.m_first_element),
m_element_count(r.m_element_count),
m_step(r.m_step) {}
basic_range<IntegerType> &operator=(const basic_range<IntegerType> &r) {
m_first_element = r.m_first_element;
m_element_count = r.m_element_count;
m_step = r.m_step;
return *this;
}
bool operator==(const basic_range<IntegerType> &r) const {
return m_first_element == r.m_first_element
&&
m_element_count == r.m_element_count
&&
m_step == r.m_step;
}
bool operator!=(const basic_range<IntegerType> &r) const {
return !(*this == r);
}
// The following four functions enable the user to compare
// ranges using ( <, >, <=, >=).
// The comparison between two ranges is a simple lexicographical
// comparison(element by element). By convention, if two ranges
// R1, R2 where R1 has a smaller number of elements. Then if
// R1 contains more elements but all R1 elements are found in R2
// R1 is considered less than R2.
bool operator<(const basic_range<IntegerType> &r) const {
// ********** This function needs refactoring.
if (m_element_count == 0 && r.m_element_count == 0)
return false;
if (m_element_count == 0 && r.m_element_count > 0)
return true;
if (m_element_count > 0 && r.m_element_count == 0)
return false;
// At this point, both has at least one element.
if (m_first_element < r.m_first_element)
return true;
if (m_first_element > r.m_first_element)
return false;
// At this point, the first element of both are equal.
if (m_element_count == 1 && r.m_element_count == 1)
return false;
if (m_element_count == 1 && r.m_element_count > 1)
return true;
if (m_element_count > 1 && r.m_element_count == 1)
return false;
// At this point, both have at least two elements with
// a similar first element. Note than the final answer
// in this case depends on the second element only, because
// we don't need to compare the elements further.
// Note that the second element is at (index == 1), because
// the first element is at (index == 0).
if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1)
return true;
if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1)
return false;
// if the first two elements of both ranges are equal, then
// they are co-linear ranges(because the step is constant).
// In that case, they comparison depends only on
// the size of the ranges by convention.
return m_element_count < r.m_element_count;
}
bool operator>(const basic_range<IntegerType> &r) const {
// ********** This function needs refactoring.
if (m_element_count == 0 && r.m_element_count == 0)
return false;
if (m_element_count == 0 && r.m_element_count > 0)
return false;
if (m_element_count > 0 && r.m_element_count == 0)
return true;
// At this point, both has at least one element.
if (m_first_element < r.m_first_element)
return false;
if (m_first_element > r.m_first_element)
return true;
// At this point, the first element of both are equal.
if (m_element_count == 1 && r.m_element_count == 1)
return false;
if (m_element_count == 1 && r.m_element_count > 1)
return false;
if (m_element_count > 1 && r.m_element_count == 1)
return true;
// At this point, both have at least two elements with
// a similar first element. Note than the final answer
// in this case depends on the second element only, because
// we don't need to compare the elements further.
// Note that the second element is at (index == 1), because
// the first element is at (index == 0).
if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1)
return false;
if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1)
return true;
// if the first two elements of both ranges are equal, then
// they are co-linear ranges(because the step is constant).
// In that case, they comparison depends only on
// the size of the ranges by convention.
return m_element_count > r.m_element_count;
}
bool operator<=(const basic_range<IntegerType> &r) const {
return !(*this > r);
}
bool operator>=(const basic_range<IntegerType> &r) const {
return !(*this < r);
}
const_iterator begin() const {
return const_iterator(this, 0);
}
const_iterator end() const {
return const_iterator(this, m_element_count);
}
const_reverse_iterator rbegin() const {
return const_reverse_iterator(this, 0);
}
const_reverse_iterator rend() const {
return const_reverse_iterator(this, m_element_count);
}
size_type size() const {
return m_element_count;
}
size_type max_size() const {
// Because this is an immutable container,
// max_size() == size()
return m_element_count;
}
bool empty() const {
return m_element_count == 0;
}
// exist() and find() are similar except that
// find() returns the index of the element.
iterator find(value_type element) const {
value_type element_index = (element - m_first_element) / m_step;
bool in_range = element_index >= 0 && element_index < m_element_count &&
(element - m_first_element) % m_step == 0;
if (in_range)
return begin() + element_index;
return end();
}
bool exist(value_type element) const {
return find(element) != end();
}
// In the standard, the operator[]
// should return a const reference.
// Because Range Generator doesn't store its elements
// internally, we return a copy of the value.
// In any case, this doesn't affect the semantics of the operator.
value_type operator[](size_type index) const {
return m_first_element + m_step * index;
}
private:
// m_first_element: begin (see specifications).
// m_element_count: (end - begin) / step
value_type m_first_element, m_element_count, m_step;
};
// This is the default type of range!
typedef basic_range<int> range;
}
}
}
#endif // range_h__
#ifndef COMPARISON_BENCHMARKS_BASE_UNBALANCED_H_
#define COMPARISON_BENCHMARKS_BASE_UNBALANCED_H_
#include <cstdint>
#include <array>
#include <vector>
#include "picosha2.h"
namespace comparison_benchmarks {
namespace base {
namespace unbalanced {
const int SEED = 42;
const int ROOT_CHILDREN = 140;
const double Q = 0.124875;
const int NORMAL_CHILDREN = 8;
const int NUM_NODES = 71069;
const int NUM_ITERATIONS = 50;
const int WARMUP_ITERATIONS = 5;
using node_state = std::array<uint8_t, 20>;
/**
* Node of an unballanced binomial tree (https://www.cs.unc.edu/~olivier/LCPC06.pdf).
* To build up the tree recursivly call spawn_child_nodes on each node until leaves are reached.
* The tree is not built up directly in memory, but rather by the recursive calls.
*/
class node {
// The state is used to allow a deterministic tree construction using sha256 hashes.
node_state state_;
// Number of children for the current node
int num_children_;
// Set this to a positive number for the root node to start the tree with a specific size
int root_children_;
// general branching factors
double q_;
int b_;
// Private constructor for children
node(node_state state, double q, int b) : state_{state},
num_children_{0},
root_children_{-1},
q_{q},
b_{b} { init_num_children(); }
std::array<uint8_t, 20> generate_child_state(uint32_t index);
double get_state_random();
void init_num_children() {
double state_random = get_state_random();
if (root_children_ > 0) {
num_children_ = root_children_; // Root always spawns children
} else if (state_random < q_) {
num_children_ = b_;
} else {
num_children_ = 0;
}
}
public:
node(uint32_t seed, int root_children, double q, int b)
: state_({{}}), num_children_{0}, root_children_{root_children}, q_{q}, b_{b} {
for (int i = 0; i < 16; i++) {
state_[i] = 0;
}
state_[16] = static_cast<uint8_t>(0xFFu & (seed >> 24u));
state_[17] = static_cast<uint8_t>(0xFFu & (seed >> 16u));
state_[18] = static_cast<uint8_t>(0xFFu & (seed >> 8u));
state_[19] = static_cast<uint8_t>(0xFFu & (seed >> 0u));
picosha2::hash256_one_by_one hasher;
hasher.process(state_.begin(), state_.end());
hasher.finish();
hasher.get_hash_bytes(state_.begin(), state_.end());
init_num_children();
}
int get_num_children() const { return num_children_; }
node spawn_child_node(int index) {
return {generate_child_state(index), q_, b_};
}
};
}
}
}
#endif //COMPARISON_BENCHMARKS_BASE_UNBALANCED_H_
#include "benchmark_base/fft.h"
namespace comparison_benchmarks {
namespace base {
namespace fft {
complex_vector generate_input() {
std::vector<double> known_frequencies{2, 11, 52, 88, 256};
fft::complex_vector data(SIZE);
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for (int i = 0; i < SIZE; i++) {
data[i] = std::complex<double>(0.0, 0.0);
for (auto frequencie : known_frequencies) {
data[i] += sin(2 * M_PI * frequencie * i / SIZE);
}
}
return data;
}
void divide(complex_vector::iterator data, int n) {
complex_vector tmp_odd_elements(n / 2);
for (int i = 0; i < n / 2; i++) {
tmp_odd_elements[i] = data[i * 2 + 1];
}
for (int i = 0; i < n / 2; i++) {
data[i] = data[i * 2];
}
for (int i = 0; i < n / 2; i++) {
data[i + n / 2] = tmp_odd_elements[i];
}
}
void combine(complex_vector::iterator data, int n) {
for (int i = 0; i < n / 2; i++) {
std::complex<double> even = data[i];
std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'base' algorithm parallel/serial,
// so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
data[i] = even + w * odd;
data[i + n / 2] = even - w * odd;
}
}
void conquer(complex_vector::iterator data, int n) {
if (n < 2) {
return;
}
divide(data, n);
conquer(data, n / 2);
conquer(data + n / 2, n / 2);
combine(data, n);
}
}
}
}
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
using namespace std;
namespace comparison_benchmarks {
namespace base {
vector<string> get_sample_image_paths() {
const int num_images = 19;
vector<string> result(num_images);
for (int i = 0; i < num_images; i++) {
ostringstream string_stream;
string_stream << "@CMAKE_CURRENT_SOURCE_DIR@/sample_images/" << i << ".jpg";
result[i] = string_stream.str();
}
return result;
}
}
}
#include "benchmark_base/unbalanced.h"
namespace comparison_benchmarks {
namespace base {
namespace unbalanced {
node_state node::generate_child_state(uint32_t index) {
node_state result;
picosha2::hash256_one_by_one hasher;
hasher.process(state_.begin(), state_.end());
auto index_begin = reinterpret_cast<uint8_t *>(&index);
hasher.process(index_begin, index_begin + 4);
hasher.finish();
hasher.get_hash_bytes(result.begin(), result.end());
return result;
}
double node::get_state_random() {
int32_t state_random_integer;
uint32_t b = ((uint32_t) state_[16] << 24u) |
((uint32_t) state_[17] << 16u) |
((uint32_t) state_[18] << 8u) |
((uint32_t) state_[19] << 0u);
b = b & 0x7fffffff; // Mask out negative values
state_random_integer = static_cast<int32_t>(b);
return (double) state_random_integer / (double) INT32_MAX;
}
}
}
}
add_library(benchmark_runner INTERFACE)
target_include_directories(benchmark_runner INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
#ifndef BENCHMARK_RUNNER_H
#define BENCHMARK_RUNNER_H
#include <string>
#include <cstdlib>
#include <vector>
#include <chrono>
#include <numeric>
#include <iostream>
#include <fstream>
#include <bits/stdc++.h>
using namespace std;
class benchmark_runner {
private:
string csv_path_;
string csv_name_;
chrono::steady_clock::time_point last_start_time_;
vector<long> times_;
void print_statistics() {
long time_sum = std::accumulate(times_.begin(), times_.end(), 0l);
cout << "Average Runtime (us): " << (time_sum / times_.size()) << endl;
}
inline bool file_exists(const std::string &name) {
ifstream f(name);
return f.good();
}
public:
benchmark_runner(string csv_path, string csv_name) : csv_path_{std::move(csv_path)},
csv_name_{std::move(csv_name)},
times_{} {
string command = "mkdir -p " + csv_path_;
int res = system(command.c_str());
if (res) {
cout << "Error while creating directory!" << endl;
exit(1);
}
}
static void read_args(int argc, char **argv, int &num_threads, string &path) {
if (argc < 3) {
cout << "Must Specifiy concurrency and output directory! (usage: `benchmark <output_directory> <num_threads>`)"
<< endl;
exit(1);
}
string tmp = argv[1];
path = tmp;
num_threads = atoi(argv[2]);
}
void start_iteration() {
last_start_time_ = chrono::steady_clock::now();
}
void end_iteration() {
auto end_time = chrono::steady_clock::now();
long time = chrono::duration_cast<chrono::microseconds>(end_time - last_start_time_).count();
times_.emplace_back(time);
}
void run_iterations(int count, function<void(void)> f, int warmup_count) {
for (int i = 0; i < warmup_count; i++) {
f();
}
for (int i = 0; i < count; i++) {
start_iteration();
f();
end_iteration();
}
}
void commit_results(bool print_stats) {
if (print_stats) {
print_statistics();
}
string full_filename = csv_path_ + csv_name_;
bool write_header = !file_exists(full_filename);
{ // Scope for output file
ofstream o(full_filename, std::fstream::out | std::fstream::app);
if (write_header) {
o << "runtime_us" << endl;
}
for (auto time : times_) {
o << time << endl;
}
} // End Scope for output file
times_.clear();
}
};
#endif //BENCHMARK_RUNNER_H
add_library(picosha2 INTERFACE)
target_include_directories(picosha2 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
\ No newline at end of file
MIT License
Copyright (c) 2017 okdshin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
/*
The MIT License (MIT)
Copyright (C) 2017 okdshin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef PICOSHA2_H
#define PICOSHA2_H
// picosha2:20140213
#ifndef PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR
#define PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR \
1048576 //=1024*1024: default is 1MB memory
#endif
#include <algorithm>
#include <cassert>
#include <iterator>
#include <sstream>
#include <vector>
#include <fstream>
namespace picosha2 {
typedef unsigned long word_t;
typedef unsigned char byte_t;
static const size_t k_digest_size = 32;
namespace detail {
inline byte_t mask_8bit(byte_t x) { return x & 0xff; }
inline word_t mask_32bit(word_t x) { return x & 0xffffffff; }
const word_t add_constant[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2};
const word_t initial_message_digest[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372,
0xa54ff53a, 0x510e527f, 0x9b05688c,
0x1f83d9ab, 0x5be0cd19};
inline word_t ch(word_t x, word_t y, word_t z) { return (x & y) ^ ((~x) & z); }
inline word_t maj(word_t x, word_t y, word_t z) {
return (x & y) ^ (x & z) ^ (y & z);
}
inline word_t rotr(word_t x, std::size_t n) {
assert(n < 32);
return mask_32bit((x >> n) | (x << (32 - n)));
}
inline word_t bsig0(word_t x) { return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22); }
inline word_t bsig1(word_t x) { return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25); }
inline word_t shr(word_t x, std::size_t n) {
assert(n < 32);
return x >> n;
}
inline word_t ssig0(word_t x) { return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3); }
inline word_t ssig1(word_t x) { return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10); }
template <typename RaIter1, typename RaIter2>
void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) {
assert(first + 64 == last);
static_cast<void>(last); // for avoiding unused-variable warning
word_t w[64];
std::fill(w, w + 64, 0);
for (std::size_t i = 0; i < 16; ++i) {
w[i] = (static_cast<word_t>(mask_8bit(*(first + i * 4))) << 24) |
(static_cast<word_t>(mask_8bit(*(first + i * 4 + 1))) << 16) |
(static_cast<word_t>(mask_8bit(*(first + i * 4 + 2))) << 8) |
(static_cast<word_t>(mask_8bit(*(first + i * 4 + 3))));
}
for (std::size_t i = 16; i < 64; ++i) {
w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) +
w[i - 16]);
}
word_t a = *message_digest;
word_t b = *(message_digest + 1);
word_t c = *(message_digest + 2);
word_t d = *(message_digest + 3);
word_t e = *(message_digest + 4);
word_t f = *(message_digest + 5);
word_t g = *(message_digest + 6);
word_t h = *(message_digest + 7);
for (std::size_t i = 0; i < 64; ++i) {
word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i];
word_t temp2 = bsig0(a) + maj(a, b, c);
h = g;
g = f;
f = e;
e = mask_32bit(d + temp1);
d = c;
c = b;
b = a;
a = mask_32bit(temp1 + temp2);
}
*message_digest += a;
*(message_digest + 1) += b;
*(message_digest + 2) += c;
*(message_digest + 3) += d;
*(message_digest + 4) += e;
*(message_digest + 5) += f;
*(message_digest + 6) += g;
*(message_digest + 7) += h;
for (std::size_t i = 0; i < 8; ++i) {
*(message_digest + i) = mask_32bit(*(message_digest + i));
}
}
} // namespace detail
template <typename InIter>
void output_hex(InIter first, InIter last, std::ostream& os) {
os.setf(std::ios::hex, std::ios::basefield);
while (first != last) {
os.width(2);
os.fill('0');
os << static_cast<unsigned int>(*first);
++first;
}
os.setf(std::ios::dec, std::ios::basefield);
}
template <typename InIter>
void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) {
std::ostringstream oss;
output_hex(first, last, oss);
hex_str.assign(oss.str());
}
template <typename InContainer>
void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) {
bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str);
}
template <typename InIter>
std::string bytes_to_hex_string(InIter first, InIter last) {
std::string hex_str;
bytes_to_hex_string(first, last, hex_str);
return hex_str;
}
template <typename InContainer>
std::string bytes_to_hex_string(const InContainer& bytes) {
std::string hex_str;
bytes_to_hex_string(bytes, hex_str);
return hex_str;
}
class hash256_one_by_one {
public:
hash256_one_by_one() { init(); }
void init() {
buffer_.clear();
std::fill(data_length_digits_, data_length_digits_ + 4, 0);
std::copy(detail::initial_message_digest,
detail::initial_message_digest + 8, h_);
}
template <typename RaIter>
void process(RaIter first, RaIter last) {
add_to_data_length(static_cast<word_t>(std::distance(first, last)));
std::copy(first, last, std::back_inserter(buffer_));
std::size_t i = 0;
for (; i + 64 <= buffer_.size(); i += 64) {
detail::hash256_block(h_, buffer_.begin() + i,
buffer_.begin() + i + 64);
}
buffer_.erase(buffer_.begin(), buffer_.begin() + i);
}
void finish() {
byte_t temp[64];
std::fill(temp, temp + 64, 0);
std::size_t remains = buffer_.size();
std::copy(buffer_.begin(), buffer_.end(), temp);
temp[remains] = 0x80;
if (remains > 55) {
std::fill(temp + remains + 1, temp + 64, 0);
detail::hash256_block(h_, temp, temp + 64);
std::fill(temp, temp + 64 - 4, 0);
} else {
std::fill(temp + remains + 1, temp + 64 - 4, 0);
}
write_data_bit_length(&(temp[56]));
detail::hash256_block(h_, temp, temp + 64);
}
template <typename OutIter>
void get_hash_bytes(OutIter first, OutIter last) const {
for (const word_t* iter = h_; iter != h_ + 8; ++iter) {
for (std::size_t i = 0; i < 4 && first != last; ++i) {
*(first++) = detail::mask_8bit(
static_cast<byte_t>((*iter >> (24 - 8 * i))));
}
}
}
private:
void add_to_data_length(word_t n) {
word_t carry = 0;
data_length_digits_[0] += n;
for (std::size_t i = 0; i < 4; ++i) {
data_length_digits_[i] += carry;
if (data_length_digits_[i] >= 65536u) {
carry = data_length_digits_[i] >> 16;
data_length_digits_[i] &= 65535u;
} else {
break;
}
}
}
void write_data_bit_length(byte_t* begin) {
word_t data_bit_length_digits[4];
std::copy(data_length_digits_, data_length_digits_ + 4,
data_bit_length_digits);
// convert byte length to bit length (multiply 8 or shift 3 times left)
word_t carry = 0;
for (std::size_t i = 0; i < 4; ++i) {
word_t before_val = data_bit_length_digits[i];
data_bit_length_digits[i] <<= 3;
data_bit_length_digits[i] |= carry;
data_bit_length_digits[i] &= 65535u;
carry = (before_val >> (16 - 3)) & 65535u;
}
// write data_bit_length
for (int i = 3; i >= 0; --i) {
(*begin++) = static_cast<byte_t>(data_bit_length_digits[i] >> 8);
(*begin++) = static_cast<byte_t>(data_bit_length_digits[i]);
}
}
std::vector<byte_t> buffer_;
word_t data_length_digits_[4]; // as 64bit integer (16bit x 4 integer)
word_t h_[8];
};
inline void get_hash_hex_string(const hash256_one_by_one& hasher,
std::string& hex_str) {
byte_t hash[k_digest_size];
hasher.get_hash_bytes(hash, hash + k_digest_size);
return bytes_to_hex_string(hash, hash + k_digest_size, hex_str);
}
inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) {
std::string hex_str;
get_hash_hex_string(hasher, hex_str);
return hex_str;
}
namespace impl {
template <typename RaIter, typename OutIter>
void hash256_impl(RaIter first, RaIter last, OutIter first2, OutIter last2, int,
std::random_access_iterator_tag) {
hash256_one_by_one hasher;
// hasher.init();
hasher.process(first, last);
hasher.finish();
hasher.get_hash_bytes(first2, last2);
}
template <typename InputIter, typename OutIter>
void hash256_impl(InputIter first, InputIter last, OutIter first2,
OutIter last2, int buffer_size, std::input_iterator_tag) {
std::vector<byte_t> buffer(buffer_size);
hash256_one_by_one hasher;
// hasher.init();
while (first != last) {
int size = buffer_size;
for (int i = 0; i != buffer_size; ++i, ++first) {
if (first == last) {
size = i;
break;
}
buffer[i] = *first;
}
hasher.process(buffer.begin(), buffer.begin() + size);
}
hasher.finish();
hasher.get_hash_bytes(first2, last2);
}
}
template <typename InIter, typename OutIter>
void hash256(InIter first, InIter last, OutIter first2, OutIter last2,
int buffer_size = PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR) {
picosha2::impl::hash256_impl(
first, last, first2, last2, buffer_size,
typename std::iterator_traits<InIter>::iterator_category());
}
template <typename InIter, typename OutContainer>
void hash256(InIter first, InIter last, OutContainer& dst) {
hash256(first, last, dst.begin(), dst.end());
}
template <typename InContainer, typename OutIter>
void hash256(const InContainer& src, OutIter first, OutIter last) {
hash256(src.begin(), src.end(), first, last);
}
template <typename InContainer, typename OutContainer>
void hash256(const InContainer& src, OutContainer& dst) {
hash256(src.begin(), src.end(), dst.begin(), dst.end());
}
template <typename InIter>
void hash256_hex_string(InIter first, InIter last, std::string& hex_str) {
byte_t hashed[k_digest_size];
hash256(first, last, hashed, hashed + k_digest_size);
std::ostringstream oss;
output_hex(hashed, hashed + k_digest_size, oss);
hex_str.assign(oss.str());
}
template <typename InIter>
std::string hash256_hex_string(InIter first, InIter last) {
std::string hex_str;
hash256_hex_string(first, last, hex_str);
return hex_str;
}
inline void hash256_hex_string(const std::string& src, std::string& hex_str) {
hash256_hex_string(src.begin(), src.end(), hex_str);
}
template <typename InContainer>
void hash256_hex_string(const InContainer& src, std::string& hex_str) {
hash256_hex_string(src.begin(), src.end(), hex_str);
}
template <typename InContainer>
std::string hash256_hex_string(const InContainer& src) {
return hash256_hex_string(src.begin(), src.end());
}
template<typename OutIter>void hash256(std::ifstream& f, OutIter first, OutIter last){
hash256(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>(), first,last);
}
}// namespace picosha2
#endif // PICOSHA2_H
...@@ -29,12 +29,12 @@ pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first, ...@@ -29,12 +29,12 @@ pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first,
// Cut in half recursively // Cut in half recursively
const long middle_index = num_elements / 2; const long middle_index = num_elements / 2;
return scheduler::par([first, middle_index, last, &function, min_elements] { return scheduler::par([first, middle_index, last, function, min_elements] {
return internal::for_each(first, return internal::for_each(first,
first + middle_index, first + middle_index,
function, function,
min_elements); min_elements);
}, [first, middle_index, last, &function, min_elements] { }, [first, middle_index, last, function, min_elements] {
return internal::for_each(first + middle_index, return internal::for_each(first + middle_index,
last, last,
function, function,
......
...@@ -112,7 +112,7 @@ struct basic_range { ...@@ -112,7 +112,7 @@ struct basic_range {
: r(rhs.r), index(rhs.index) {} : r(rhs.r), index(rhs.index) {}
const_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index) const_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index)
: r(p_range), index(p_index) {} : r(*p_range), index(p_index) {}
const_iterator_impl &operator=(const const_iterator_impl &rhs) { const_iterator_impl &operator=(const const_iterator_impl &rhs) {
r = rhs.r; r = rhs.r;
...@@ -121,7 +121,7 @@ struct basic_range { ...@@ -121,7 +121,7 @@ struct basic_range {
} }
bool operator==(const const_iterator_impl &rhs) const { bool operator==(const const_iterator_impl &rhs) const {
return *r == *(rhs.r) && index == rhs.index; return r == rhs.r && index == rhs.index;
} }
bool operator!=(const const_iterator_impl &rhs) const { bool operator!=(const const_iterator_impl &rhs) const {
...@@ -145,7 +145,7 @@ struct basic_range { ...@@ -145,7 +145,7 @@ struct basic_range {
} }
value_type operator*() const { value_type operator*() const {
return r->m_first_element + r->m_step * index; return r.m_first_element + r.m_step * index;
} }
// operator-> // operator->
...@@ -212,11 +212,11 @@ struct basic_range { ...@@ -212,11 +212,11 @@ struct basic_range {
value_type operator[](difference_type offset) const { value_type operator[](difference_type offset) const {
size_type new_index = index + offset; size_type new_index = index + offset;
return r->m_first_element + r->m_step * new_index; return r.m_first_element + r.m_step * new_index;
} }
private: private:
basic_range<IntegerType> const *r; basic_range<IntegerType> r;
size_type index; size_type index;
}; };
...@@ -236,7 +236,7 @@ struct basic_range { ...@@ -236,7 +236,7 @@ struct basic_range {
: r(rhs.r), index(rhs.index) {} : r(rhs.r), index(rhs.index) {}
const_reverse_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index) const_reverse_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index)
: r(p_range), index(p_index) {} : r(*p_range), index(p_index) {}
const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) { const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) {
r = rhs.r; r = rhs.r;
...@@ -245,7 +245,7 @@ struct basic_range { ...@@ -245,7 +245,7 @@ struct basic_range {
} }
bool operator==(const const_reverse_iterator_impl &rhs) const { bool operator==(const const_reverse_iterator_impl &rhs) const {
return *r == *(rhs.r) && index == rhs.index; return r == rhs.r && index == rhs.index;
} }
bool operator!=(const const_reverse_iterator_impl &rhs) const { bool operator!=(const const_reverse_iterator_impl &rhs) const {
...@@ -270,8 +270,8 @@ struct basic_range { ...@@ -270,8 +270,8 @@ struct basic_range {
value_type operator*() const { value_type operator*() const {
size_type reverse_index size_type reverse_index
= (r->m_element_count - 1) - index; = (r.m_element_count - 1) - index;
return r->m_first_element + r->m_step * reverse_index; return r.m_first_element + r.m_step * reverse_index;
} }
// operator-> // operator->
...@@ -338,12 +338,12 @@ struct basic_range { ...@@ -338,12 +338,12 @@ struct basic_range {
value_type operator[](difference_type offset) const { value_type operator[](difference_type offset) const {
size_type new_reverse_index size_type new_reverse_index
= (r->m_element_count - 1) - (index + offset); = (r.m_element_count - 1) - (index + offset);
return r->m_first_element + r->m_step * new_reverse_index; return r.m_first_element + r.m_step * new_reverse_index;
} }
private: private:
basic_range<IntegerType> const *r; basic_range<IntegerType> r;
size_type index; size_type index;
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment