diff --git a/.gitignore b/.gitignore index 8dcbce9..5df32e7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ # Build Output /cmake-build-*/ +# Benchmark Output +.prof + diff --git a/NOTES.md b/NOTES.md index e771ad0..a495bab 100644 --- a/NOTES.md +++ b/NOTES.md @@ -4,6 +4,14 @@ A collection of stuff that we noticed during development. Useful later on two write a project report and to go back in time to find out why certain decisions where made. +## 08.04.2019 - Random Numbers + +We decided to go for a simple linear random number generator +as [std::minstd_rand](http://www.cplusplus.com/reference/random/minstd_rand/), +as this requires less memory and is faster. The decreased quality +in random numbers is probably ok (read up if there is literature on this), +as work stealing does not rely on a mathematically perfect distribution. + ## 02.04.2019 - CMake Export We built our project using CMake to make it portable and easy to setup. diff --git a/PERFORMANCE.md b/PERFORMANCE.md new file mode 100644 index 0000000..2ec5a0d --- /dev/null +++ b/PERFORMANCE.md @@ -0,0 +1,21 @@ +# Notes on performance measures during development + +#### Commit 9c12addf + +| | | | | | | | | | | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +old | 1659.01 us| 967.19 us| 830.08 us| 682.69 us| 737.71 us| 747.92 us| 749.37 us| 829.75 us| 7203.73 us +new | 1676.06 us| 981.56 us| 814.71 us| 698.72 us| 680.87 us| 737.68 us| 756.91 us| 764.71 us| 7111.22 us +change | 101.03 %| 101.49 %| 98.15 %| 102.35 %| 92.30 %| 98.63 %| 101.01 %| 92.16 %| 98.72 % + +| | | | | | | | | | | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +old | 1648.65 us| 973.33 us| 820.18 us| 678.80 us| 746.21 us| 767.63 us| 747.17 us| 1025.35 us| 7407.32 us +new | 1655.09 us| 964.99 us| 807.57 us| 731.34 us| 747.47 us| 714.71 us| 794.35 us| 760.28 us| 7175.80 us +change | 100.39 %| 99.14 %| 98.46 %| 107.74 %| 100.17 %| 93.11 %| 106.31 %| 74.15 %| 96.87 % + +| | | | | | | | | | | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +old | 1654.26 us| 969.12 us| 832.13 us| 680.69 us| 718.70 us| 750.80 us| 744.12 us| 775.24 us| 7125.07 us +new | 1637.04 us| 978.09 us| 799.93 us| 709.33 us| 746.42 us| 684.87 us| 822.30 us| 787.61 us| 7165.59 us +change | 98.96 %| 100.93 %| 96.13 %| 104.21 %| 103.86 %| 91.22 %| 110.51 %| 101.60 %| 100.57 % diff --git a/compare_benchmarks.py b/compare_benchmarks.py index 56ebddb..9a6c9e6 100755 --- a/compare_benchmarks.py +++ b/compare_benchmarks.py @@ -9,19 +9,25 @@ if len(sys.argv) < 2: target = sys.argv[1] print('Comparing current modifications for benchmark target ' + target) -print('Executing current version...') -print(os.popen('cd cmake-build-release; make ' + target).read()) -current = os.popen('chrt -rr 99 ./cmake-build-release/bin/' + target).read() - print('Executing old version...') -print(os.popen('git stash push').read()) +git_stash_result = os.popen('git stash push').read() +if 'No local changes' in git_stash_result: + print('No local changes, nothing to compare, exiting!') + exit(1) +print(git_stash_result) print(os.popen('cd cmake-build-release; make ' + target).read()) -before = os.popen('chrt -rr 99 ./cmake-build-release/bin/' + target).read() +before = os.popen('nice -20 ./cmake-build-release/bin/' + target).read() print(os.popen('git stash pop').read()) +print('Executing current version...') +print(os.popen('cd cmake-build-release; make ' + target).read()) +current = os.popen('nice -20 ./cmake-build-release/bin/' + target).read() + print('=======================================================') current = [float(value) for value in current.split(',')] before = [float(value) for value in before.split(',')] +current.append(sum(current)) +before.append(sum(before)) def formate_change(change): @@ -32,13 +38,12 @@ def formate_change(change): else: color = '30' - return '\033[1;' + color + ';40m %8.2f' % (change * 100) + ' %' + return '\033[1;' + color + ';40m %9.2f' % (change * 100) + ' %' -format_string = ' '.join(['%10.2f us'] * len(current)) -print('old: ' + format_string % tuple(before)) -print('new: ' + format_string % tuple(current)) -print('=' * 55) +format_string = '|'.join(['%10.2f us'] * len(current)) +print('old | ' + format_string % tuple(before)) +print('new | ' + format_string % tuple(current)) change = [c / b for b, c in zip(before, current)] -formated_change = ''.join(list(map(formate_change, change))) -print(formated_change) +formated_change = '|'.join(list(map(formate_change, change))) +print('change |' + formated_change) diff --git a/lib/pls/include/pls/internal/scheduling/thread_state.h b/lib/pls/include/pls/internal/scheduling/thread_state.h index 042c8f8..2f9cda9 100644 --- a/lib/pls/include/pls/internal/scheduling/thread_state.h +++ b/lib/pls/include/pls/internal/scheduling/thread_state.h @@ -2,7 +2,10 @@ #ifndef PLS_THREAD_STATE_H #define PLS_THREAD_STATE_H +#include + #include "abstract_task.h" + #include "pls/internal/base/aligned_stack.h" namespace pls { @@ -16,22 +19,25 @@ namespace pls { abstract_task* root_task_; abstract_task* current_task_; base::aligned_stack* task_stack_; - unsigned int id_; + size_t id_; base::spin_lock lock_; + std::minstd_rand random_; thread_state(): scheduler_{nullptr}, root_task_{nullptr}, current_task_{nullptr}, task_stack_{nullptr}, - id_{0} {}; + id_{0}, + random_{id_} {}; thread_state(scheduler* scheduler, base::aligned_stack* task_stack, unsigned int id): scheduler_{scheduler}, root_task_{nullptr}, current_task_{nullptr}, task_stack_{task_stack}, - id_{id} {} + id_{id}, + random_{id_} {} }; } } diff --git a/lib/pls/src/internal/base/spin_lock.cpp b/lib/pls/src/internal/base/spin_lock.cpp index c99019b..aff28f4 100644 --- a/lib/pls/src/internal/base/spin_lock.cpp +++ b/lib/pls/src/internal/base/spin_lock.cpp @@ -1,3 +1,4 @@ +#include "pls/internal/helpers/profiler.h" #include "pls/internal/base/spin_lock.h" namespace pls { @@ -7,6 +8,7 @@ namespace pls { // For now we simply try to be safe by forcing this lock to // also act as a strict memory fence. void spin_lock::lock() { + PROFILE_LOCK("Acquire Lock") int tries = 0; while (flag_.test_and_set(std::memory_order_seq_cst)) { tries++; diff --git a/lib/pls/src/internal/scheduling/abstract_task.cpp b/lib/pls/src/internal/scheduling/abstract_task.cpp index 3b75bd0..bfca31f 100644 --- a/lib/pls/src/internal/scheduling/abstract_task.cpp +++ b/lib/pls/src/internal/scheduling/abstract_task.cpp @@ -9,18 +9,21 @@ namespace pls { namespace scheduling { bool abstract_task::steal_work() { PROFILE_STEALING("abstract_task::steal_work") - auto my_state = base::this_thread::state(); - auto my_scheduler = my_state->scheduler_; + const auto my_state = base::this_thread::state(); + const auto my_scheduler = my_state->scheduler_; - int my_id = my_state->id_; - for (size_t i = 1; i < my_scheduler->num_threads(); i++) { - size_t target = (my_id + i) % my_scheduler->num_threads(); + const size_t my_id = my_state->id_; + const size_t offset = my_state->random_() % my_scheduler->num_threads(); + const size_t max_tries = 1; // my_scheduler->num_threads(); TODO: Tune this value + for (size_t i = 0; i < max_tries; i++) { + size_t target = (offset + i) % my_scheduler->num_threads(); + if (target == my_id) { + continue; + } auto target_state = my_scheduler->thread_state_for(target); // TODO: Cleaner Locking Using std::guarded_lock - PROFILE_LOCK("Acquire Thread Lock") target_state->lock_.lock(); - PROFILE_END_BLOCK // Dig down to our level PROFILE_STEALING("Go to our level")