Commit 310c33d2 by Florian Fritz

Merge branch 'random_stealing' into 'master'

Merge: Randomized Work Stealing

See merge request !6
parents 9c12addf 52fcb51f
Pipeline #1142 passed with stages
in 3 minutes 33 seconds
...@@ -5,3 +5,6 @@ ...@@ -5,3 +5,6 @@
# Build Output # Build Output
/cmake-build-*/ /cmake-build-*/
# Benchmark Output
.prof
...@@ -4,6 +4,14 @@ A collection of stuff that we noticed during development. ...@@ -4,6 +4,14 @@ A collection of stuff that we noticed during development.
Useful later on two write a project report and to go back Useful later on two write a project report and to go back
in time to find out why certain decisions where made. in time to find out why certain decisions where made.
## 08.04.2019 - Random Numbers
We decided to go for a simple linear random number generator
as [std::minstd_rand](http://www.cplusplus.com/reference/random/minstd_rand/),
as this requires less memory and is faster. The decreased quality
in random numbers is probably ok (read up if there is literature on this),
as work stealing does not rely on a mathematically perfect distribution.
## 02.04.2019 - CMake Export ## 02.04.2019 - CMake Export
We built our project using CMake to make it portable and easy to setup. We built our project using CMake to make it portable and easy to setup.
......
# Notes on performance measures during development
#### Commit 9c12addf
| | | | | | | | | | |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
old | 1659.01 us| 967.19 us| 830.08 us| 682.69 us| 737.71 us| 747.92 us| 749.37 us| 829.75 us| 7203.73 us
new | 1676.06 us| 981.56 us| 814.71 us| 698.72 us| 680.87 us| 737.68 us| 756.91 us| 764.71 us| 7111.22 us
change | 101.03 %| 101.49 %| 98.15 %| 102.35 %| 92.30 %| 98.63 %| 101.01 %| 92.16 %| 98.72 %
| | | | | | | | | | |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
old | 1648.65 us| 973.33 us| 820.18 us| 678.80 us| 746.21 us| 767.63 us| 747.17 us| 1025.35 us| 7407.32 us
new | 1655.09 us| 964.99 us| 807.57 us| 731.34 us| 747.47 us| 714.71 us| 794.35 us| 760.28 us| 7175.80 us
change | 100.39 %| 99.14 %| 98.46 %| 107.74 %| 100.17 %| 93.11 %| 106.31 %| 74.15 %| 96.87 %
| | | | | | | | | | |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
old | 1654.26 us| 969.12 us| 832.13 us| 680.69 us| 718.70 us| 750.80 us| 744.12 us| 775.24 us| 7125.07 us
new | 1637.04 us| 978.09 us| 799.93 us| 709.33 us| 746.42 us| 684.87 us| 822.30 us| 787.61 us| 7165.59 us
change | 98.96 %| 100.93 %| 96.13 %| 104.21 %| 103.86 %| 91.22 %| 110.51 %| 101.60 %| 100.57 %
...@@ -9,19 +9,25 @@ if len(sys.argv) < 2: ...@@ -9,19 +9,25 @@ if len(sys.argv) < 2:
target = sys.argv[1] target = sys.argv[1]
print('Comparing current modifications for benchmark target ' + target) print('Comparing current modifications for benchmark target ' + target)
print('Executing current version...')
print(os.popen('cd cmake-build-release; make ' + target).read())
current = os.popen('chrt -rr 99 ./cmake-build-release/bin/' + target).read()
print('Executing old version...') print('Executing old version...')
print(os.popen('git stash push').read()) git_stash_result = os.popen('git stash push').read()
if 'No local changes' in git_stash_result:
print('No local changes, nothing to compare, exiting!')
exit(1)
print(git_stash_result)
print(os.popen('cd cmake-build-release; make ' + target).read()) print(os.popen('cd cmake-build-release; make ' + target).read())
before = os.popen('chrt -rr 99 ./cmake-build-release/bin/' + target).read() before = os.popen('nice -20 ./cmake-build-release/bin/' + target).read()
print(os.popen('git stash pop').read()) print(os.popen('git stash pop').read())
print('Executing current version...')
print(os.popen('cd cmake-build-release; make ' + target).read())
current = os.popen('nice -20 ./cmake-build-release/bin/' + target).read()
print('=======================================================') print('=======================================================')
current = [float(value) for value in current.split(',')] current = [float(value) for value in current.split(',')]
before = [float(value) for value in before.split(',')] before = [float(value) for value in before.split(',')]
current.append(sum(current))
before.append(sum(before))
def formate_change(change): def formate_change(change):
...@@ -32,13 +38,12 @@ def formate_change(change): ...@@ -32,13 +38,12 @@ def formate_change(change):
else: else:
color = '30' color = '30'
return '\033[1;' + color + ';40m %8.2f' % (change * 100) + ' %' return '\033[1;' + color + ';40m %9.2f' % (change * 100) + ' %'
format_string = ' '.join(['%10.2f us'] * len(current)) format_string = '|'.join(['%10.2f us'] * len(current))
print('old: ' + format_string % tuple(before)) print('old | ' + format_string % tuple(before))
print('new: ' + format_string % tuple(current)) print('new | ' + format_string % tuple(current))
print('=' * 55)
change = [c / b for b, c in zip(before, current)] change = [c / b for b, c in zip(before, current)]
formated_change = ''.join(list(map(formate_change, change))) formated_change = '|'.join(list(map(formate_change, change)))
print(formated_change) print('change |' + formated_change)
...@@ -2,7 +2,10 @@ ...@@ -2,7 +2,10 @@
#ifndef PLS_THREAD_STATE_H #ifndef PLS_THREAD_STATE_H
#define PLS_THREAD_STATE_H #define PLS_THREAD_STATE_H
#include <random>
#include "abstract_task.h" #include "abstract_task.h"
#include "pls/internal/base/aligned_stack.h" #include "pls/internal/base/aligned_stack.h"
namespace pls { namespace pls {
...@@ -16,22 +19,25 @@ namespace pls { ...@@ -16,22 +19,25 @@ namespace pls {
abstract_task* root_task_; abstract_task* root_task_;
abstract_task* current_task_; abstract_task* current_task_;
base::aligned_stack* task_stack_; base::aligned_stack* task_stack_;
unsigned int id_; size_t id_;
base::spin_lock lock_; base::spin_lock lock_;
std::minstd_rand random_;
thread_state(): thread_state():
scheduler_{nullptr}, scheduler_{nullptr},
root_task_{nullptr}, root_task_{nullptr},
current_task_{nullptr}, current_task_{nullptr},
task_stack_{nullptr}, task_stack_{nullptr},
id_{0} {}; id_{0},
random_{id_} {};
thread_state(scheduler* scheduler, base::aligned_stack* task_stack, unsigned int id): thread_state(scheduler* scheduler, base::aligned_stack* task_stack, unsigned int id):
scheduler_{scheduler}, scheduler_{scheduler},
root_task_{nullptr}, root_task_{nullptr},
current_task_{nullptr}, current_task_{nullptr},
task_stack_{task_stack}, task_stack_{task_stack},
id_{id} {} id_{id},
random_{id_} {}
}; };
} }
} }
......
#include "pls/internal/helpers/profiler.h"
#include "pls/internal/base/spin_lock.h" #include "pls/internal/base/spin_lock.h"
namespace pls { namespace pls {
...@@ -7,6 +8,7 @@ namespace pls { ...@@ -7,6 +8,7 @@ namespace pls {
// For now we simply try to be safe by forcing this lock to // For now we simply try to be safe by forcing this lock to
// also act as a strict memory fence. // also act as a strict memory fence.
void spin_lock::lock() { void spin_lock::lock() {
PROFILE_LOCK("Acquire Lock")
int tries = 0; int tries = 0;
while (flag_.test_and_set(std::memory_order_seq_cst)) { while (flag_.test_and_set(std::memory_order_seq_cst)) {
tries++; tries++;
......
...@@ -9,18 +9,21 @@ namespace pls { ...@@ -9,18 +9,21 @@ namespace pls {
namespace scheduling { namespace scheduling {
bool abstract_task::steal_work() { bool abstract_task::steal_work() {
PROFILE_STEALING("abstract_task::steal_work") PROFILE_STEALING("abstract_task::steal_work")
auto my_state = base::this_thread::state<thread_state>(); const auto my_state = base::this_thread::state<thread_state>();
auto my_scheduler = my_state->scheduler_; const auto my_scheduler = my_state->scheduler_;
int my_id = my_state->id_; const size_t my_id = my_state->id_;
for (size_t i = 1; i < my_scheduler->num_threads(); i++) { const size_t offset = my_state->random_() % my_scheduler->num_threads();
size_t target = (my_id + i) % my_scheduler->num_threads(); const size_t max_tries = 1; // my_scheduler->num_threads(); TODO: Tune this value
for (size_t i = 0; i < max_tries; i++) {
size_t target = (offset + i) % my_scheduler->num_threads();
if (target == my_id) {
continue;
}
auto target_state = my_scheduler->thread_state_for(target); auto target_state = my_scheduler->thread_state_for(target);
// TODO: Cleaner Locking Using std::guarded_lock // TODO: Cleaner Locking Using std::guarded_lock
PROFILE_LOCK("Acquire Thread Lock")
target_state->lock_.lock(); target_state->lock_.lock();
PROFILE_END_BLOCK
// Dig down to our level // Dig down to our level
PROFILE_STEALING("Go to our level") PROFILE_STEALING("Go to our level")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment