diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index ff960cb..f4f2b1b 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -1,6 +1,6 @@ -#include "pls/internal/scheduling/scheduler.h" +#include "pls/pls.h" -using namespace pls::internal::scheduling; +using namespace pls; #include "benchmark_runner.h" #include "benchmark_base/fft.h" @@ -20,20 +20,20 @@ void pls_conquer(fft::complex_vector::iterator data, fft::complex_vector::iterat fft::conquer(data, swap_array, n / 2); fft::conquer(data + n / 2, swap_array + n / 2, n / 2); } else { - scheduler::spawn([data, n, swap_array]() { + spawn([data, n, swap_array]() { pls_conquer(data, swap_array, n / 2); }); - scheduler::spawn([data, n, swap_array]() { + spawn([data, n, swap_array]() { pls_conquer(data + n / 2, swap_array + n / 2, n / 2); }); - scheduler::sync(); + sync(); } fft::combine(data, n); } -constexpr int MAX_NUM_TASKS = 32; -constexpr int MAX_STACK_SIZE = 1024 * 4; +constexpr int MAX_NUM_TASKS = 16; +constexpr int MAX_STACK_SIZE = 4096 * 1; int main(int argc, char **argv) { int num_threads; diff --git a/app/benchmark_fib/main.cpp b/app/benchmark_fib/main.cpp index b304bb6..fdfe815 100644 --- a/app/benchmark_fib/main.cpp +++ b/app/benchmark_fib/main.cpp @@ -1,6 +1,6 @@ -#include "pls/internal/scheduling/scheduler.h" +#include "pls/pls.h" -using namespace pls::internal::scheduling; +using namespace pls; #include @@ -18,19 +18,19 @@ int pls_fib(int n) { } int a, b; - scheduler::spawn([n, &a]() { + spawn([n, &a]() { a = pls_fib(n - 1); }); - scheduler::spawn([n, &b]() { + spawn([n, &b]() { b = pls_fib(n - 2); }); - scheduler::sync(); + sync(); return a + b; } constexpr int MAX_NUM_TASKS = 32; -constexpr int MAX_STACK_SIZE = 1024 * 32; +constexpr int MAX_STACK_SIZE = 4096 * 1; int main(int argc, char **argv) { int num_threads; diff --git a/app/benchmark_matrix/main.cpp b/app/benchmark_matrix/main.cpp index 5d69b36..9ba1612 100644 --- a/app/benchmark_matrix/main.cpp +++ b/app/benchmark_matrix/main.cpp @@ -1,7 +1,6 @@ -#include "pls/internal/scheduling/scheduler.h" -#include "pls/algorithms/for_each.h" +#include "pls/pls.h" -using namespace pls::internal::scheduling; +using namespace pls; #include "benchmark_runner.h" #include "benchmark_base/matrix.h" diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index 5bffc2e..fcdb2ac 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -25,7 +25,7 @@ add_library(pls STATIC include/pls/internal/data_structures/optional.h include/pls/internal/helpers/prohibit_new.h - include/pls/internal/helpers/profiler.h + include/pls/internal/helpers/easy_profiler.h include/pls/internal/helpers/unique_id.h include/pls/internal/helpers/range.h include/pls/internal/helpers/seqence.h diff --git a/lib/pls/include/pls/dataflow/internal/function_node_impl.h b/lib/pls/include/pls/dataflow/internal/function_node_impl.h index e2e80d8..35e17df 100644 --- a/lib/pls/include/pls/dataflow/internal/function_node_impl.h +++ b/lib/pls/include/pls/dataflow/internal/function_node_impl.h @@ -3,7 +3,7 @@ #define PLS_DATAFLOW_INTERNAL_FUNCTION_NODE_IMPL_H_ #include "graph.h" -#include "pls/internal/helpers/profiler.h" +#include "pls/internal/helpers/easy_profiler.h" namespace pls { namespace dataflow { diff --git a/lib/pls/include/pls/dataflow/internal/graph_impl.h b/lib/pls/include/pls/dataflow/internal/graph_impl.h index f135982..1a9e2a2 100644 --- a/lib/pls/include/pls/dataflow/internal/graph_impl.h +++ b/lib/pls/include/pls/dataflow/internal/graph_impl.h @@ -2,7 +2,7 @@ #ifndef PLS_DATAFLOW_INTERNAL_GRAPH_IMPL_H_ #define PLS_DATAFLOW_INTERNAL_GRAPH_IMPL_H_ -#include "pls/internal/helpers/profiler.h" +#include "pls/internal/helpers/easy_profiler.h" namespace pls { namespace dataflow { diff --git a/lib/pls/include/pls/internal/base/backoff.h b/lib/pls/include/pls/internal/base/backoff.h index f0625b2..f847a36 100644 --- a/lib/pls/include/pls/internal/base/backoff.h +++ b/lib/pls/include/pls/internal/base/backoff.h @@ -3,7 +3,6 @@ #define PLS_BACKOFF_H_ #include "pls/internal/base/system_details.h" -#include "pls/internal/helpers/profiler.h" #include #include @@ -30,11 +29,9 @@ class backoff { backoff() : current_{INITIAL_SPIN_ITERS}, random_{std::random_device{}()} {} void do_backoff() { - PROFILE_LOCK("Backoff") spin(random_() % std::min(current_, MAX_SPIN_ITERS)); if (current_ >= YELD_ITERS) { - PROFILE_LOCK("Yield") using namespace std::chrono_literals; std::this_thread::sleep_for(5us); } diff --git a/lib/pls/include/pls/internal/base/swmr_spin_lock.h b/lib/pls/include/pls/internal/base/swmr_spin_lock.h index bfe9284..2fa4257 100644 --- a/lib/pls/include/pls/internal/base/swmr_spin_lock.h +++ b/lib/pls/include/pls/internal/base/swmr_spin_lock.h @@ -4,8 +4,6 @@ #include -#include "pls/internal/helpers/profiler.h" - namespace pls { namespace internal { namespace base { diff --git a/lib/pls/include/pls/internal/helpers/profiler.h b/lib/pls/include/pls/internal/helpers/easy_profiler.h similarity index 58% rename from lib/pls/include/pls/internal/helpers/profiler.h rename to lib/pls/include/pls/internal/helpers/easy_profiler.h index e0fe3ad..dd5e4fc 100644 --- a/lib/pls/include/pls/internal/helpers/profiler.h +++ b/lib/pls/include/pls/internal/helpers/easy_profiler.h @@ -6,33 +6,41 @@ #include #include -#define PROFILE_TASK(msg) EASY_BLOCK(msg, profiler::colors::LightBlue) -#define PROFILE_CONTINUATION(msg) EASY_BLOCK(msg, profiler::colors::LightBlue) -#define PROFILE_FAST_PATH(msg) EASY_BLOCK(msg, profiler::colors::Green) -#define PROFILE_STEALING(msg) EASY_BLOCK(msg, profiler::colors::Orange) -#define PROFILE_LOCK(msg) EASY_BLOCK(msg, profiler::colors::Red) +// 'bad' events that eat time +#define PROFILE_SYSCALL(msg) EASY_BLOCK(msg, profiler::colors::Red) +#define PROFILE_STEAL(msg) EASY_BLOCK(msg, profiler::colors::Orange) +#define PROFILE_SCHED(msg) EASY_BLOCK(msg, profiler::colors::Yellow) +// 'good' events that actually do work +#define PROFILE_WORK(msg) EASY_BLOCK(msg, profiler::colors::Green) + +// heplers for saving/initializing #define PROFILE_END_BLOCK EASY_END_BLOCK #define PROFILE_SAVE(filename) profiler::dumpBlocksToFile(filename); #define PROFILE_ENABLE EASY_PROFILER_ENABLE #define PROFILE_MAIN_THREAD EASY_MAIN_THREAD +#define PROFILE_WORKER_THREAD EASY_THREAD("Worker") #define PROFILE_VALUE(name, value) EASY_VALUE(name, value) #else //ENABLE_EASY_PROFILER -#define PROFILE_TASK(msg) -#define PROFILE_CONTINUATION(msg) -#define PROFILE_FAST_PATH(msg) -#define PROFILE_STEALING(msg) -#define PROFILE_LOCK(msg) +// 'bad' events that eat time +#define PROFILE_SYSCALL(msg) +#define PROFILE_STEAL(msg) +#define PROFILE_SCHED(msg) + +// 'good' events that actually do work +#define PROFILE_WORK(msg) +// heplers for saving/initializing #define PROFILE_END_BLOCK #define PROFILE_SAVE(filename) #define PROFILE_ENABLE #define PROFILE_MAIN_THREAD +#define PROFILE_WORKER_THREAD #define PROFILE_VALUE(name, value) diff --git a/lib/pls/include/pls/internal/scheduling/lock_free/external_trading_deque.h b/lib/pls/include/pls/internal/scheduling/lock_free/external_trading_deque.h index 5ac2943..c032de0 100644 --- a/lib/pls/include/pls/internal/scheduling/lock_free/external_trading_deque.h +++ b/lib/pls/include/pls/internal/scheduling/lock_free/external_trading_deque.h @@ -86,7 +86,6 @@ class external_trading_deque { private: void reset_bot_and_top(); - void decrease_bot(); // info on this deque unsigned thread_id_; diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index 0162b82..3588643 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -7,8 +7,6 @@ #include #include -#include "pls/internal/helpers/profiler.h" - #include "pls/internal/base/barrier.h" #include "pls/internal/base/stack_allocator.h" diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h index e70aacd..aceda9b 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h @@ -7,8 +7,6 @@ #include "context_switcher/context_switcher.h" #include "context_switcher/continuation.h" -#include "pls/internal/helpers/profiler.h" - #include "pls/internal/scheduling/task_manager.h" #include "pls/internal/scheduling/base_task.h" #include "base_task.h" @@ -65,7 +63,7 @@ class scheduler::init_function_impl : public init_function { explicit init_function_impl(F &function) : function_{function} {} void run() override { base_task *root_task = thread_state::get().get_active_task(); - root_task->run_as_task([root_task, this](::context_switcher::continuation cont) { + root_task->run_as_task([root_task, this](auto cont) { root_task->is_synchronized_ = true; thread_state::get().main_continuation() = std::move(cont); function_(); diff --git a/lib/pls/src/internal/base/swmr_spin_lock.cpp b/lib/pls/src/internal/base/swmr_spin_lock.cpp index ed26ac0..2f7a514 100644 --- a/lib/pls/src/internal/base/swmr_spin_lock.cpp +++ b/lib/pls/src/internal/base/swmr_spin_lock.cpp @@ -6,7 +6,6 @@ namespace internal { namespace base { bool swmr_spin_lock::reader_try_lock() { - PROFILE_LOCK("Try Acquire Read Lock") if (write_request_.load(std::memory_order_acquire) == 1) { return false; } @@ -22,12 +21,10 @@ bool swmr_spin_lock::reader_try_lock() { } void swmr_spin_lock::reader_unlock() { - PROFILE_LOCK("Release Read Lock") readers_--; } void swmr_spin_lock::writer_lock() { - PROFILE_LOCK("Acquire Write Lock") // Tell the readers that we would like to write write_request_ = 1; @@ -37,7 +34,6 @@ void swmr_spin_lock::writer_lock() { } void swmr_spin_lock::writer_unlock() { - PROFILE_LOCK("Release Write Lock") write_request_ = 0; } diff --git a/lib/pls/src/internal/base/tas_spin_lock.cpp b/lib/pls/src/internal/base/tas_spin_lock.cpp index ed233bf..db8129c 100644 --- a/lib/pls/src/internal/base/tas_spin_lock.cpp +++ b/lib/pls/src/internal/base/tas_spin_lock.cpp @@ -1,4 +1,3 @@ -#include "pls/internal/helpers/profiler.h" #include "pls/internal/base/tas_spin_lock.h" #include "pls/internal/base/backoff.h" @@ -7,7 +6,6 @@ namespace internal { namespace base { void tas_spin_lock::lock() { - PROFILE_LOCK("Acquire Lock") backoff backoff_strategy; while (true) { @@ -19,7 +17,6 @@ void tas_spin_lock::lock() { } bool tas_spin_lock::try_lock(unsigned int num_tries) { - PROFILE_LOCK("Try Acquire Lock") backoff backoff_strategy; while (true) { @@ -37,7 +34,6 @@ bool tas_spin_lock::try_lock(unsigned int num_tries) { } void tas_spin_lock::unlock() { - PROFILE_LOCK("Unlock") flag_.clear(std::memory_order_release); } diff --git a/lib/pls/src/internal/base/ttas_spin_lock.cpp b/lib/pls/src/internal/base/ttas_spin_lock.cpp index 7573d07..5821364 100644 --- a/lib/pls/src/internal/base/ttas_spin_lock.cpp +++ b/lib/pls/src/internal/base/ttas_spin_lock.cpp @@ -1,4 +1,3 @@ -#include "pls/internal/helpers/profiler.h" #include "pls/internal/base/ttas_spin_lock.h" #include "pls/internal/base/backoff.h" @@ -7,7 +6,6 @@ namespace internal { namespace base { void ttas_spin_lock::lock() { - PROFILE_LOCK("Acquire Lock") int expected = 0; backoff backoff_; @@ -24,7 +22,6 @@ void ttas_spin_lock::lock() { } bool ttas_spin_lock::try_lock(unsigned int num_tries) { - PROFILE_LOCK("Try Acquire Lock") int expected = 0; backoff backoff_; @@ -50,7 +47,6 @@ bool ttas_spin_lock::try_lock(unsigned int num_tries) { } void ttas_spin_lock::unlock() { - PROFILE_LOCK("Unlock") flag_.store(0, std::memory_order_release); } diff --git a/lib/pls/src/internal/scheduling/lock_free/external_trading_deque.cpp b/lib/pls/src/internal/scheduling/lock_free/external_trading_deque.cpp index e3421c3..c17ee23 100644 --- a/lib/pls/src/internal/scheduling/lock_free/external_trading_deque.cpp +++ b/lib/pls/src/internal/scheduling/lock_free/external_trading_deque.cpp @@ -3,7 +3,7 @@ namespace pls::internal::scheduling::lock_free { -task * external_trading_deque::peek_traded_object(task *target_task) { +task *external_trading_deque::peek_traded_object(task *target_task) { traded_cas_field current_cas = target_task->external_trading_deque_cas_.load(); if (current_cas.is_filled_with_object()) { return current_cas.get_trade_object(); @@ -12,7 +12,7 @@ task * external_trading_deque::peek_traded_object(task *target_task) { } } -task * external_trading_deque::get_trade_object(task *target_task) { +task *external_trading_deque::get_trade_object(task *target_task) { traded_cas_field current_cas = target_task->external_trading_deque_cas_.load(); if (current_cas.is_filled_with_object()) { task *result = current_cas.get_trade_object(); @@ -53,35 +53,29 @@ void external_trading_deque::reset_bot_and_top() { top_.store({bot_internal_.stamp, 0}); } -void external_trading_deque::decrease_bot() { - bot_internal_.value--; - bot_.store(bot_internal_.value, std::memory_order_relaxed); -} - -task * external_trading_deque::pop_bot() { - if (bot_internal_.value == 0) { - reset_bot_and_top(); - return nullptr; - } - decrease_bot(); +task *external_trading_deque::pop_bot() { + if (bot_internal_.value > 0) { + bot_internal_.value--; + bot_.store(bot_internal_.value, std::memory_order_relaxed); - auto ¤t_entry = entries_[bot_internal_.value]; - auto *popped_task = current_entry.traded_task_.load(std::memory_order_relaxed); - auto expected_stamp = current_entry.forwarding_stamp_.load(std::memory_order_relaxed); + auto ¤t_entry = entries_[bot_internal_.value]; + auto *popped_task = current_entry.traded_task_.load(std::memory_order_relaxed); + auto expected_stamp = current_entry.forwarding_stamp_.load(std::memory_order_relaxed); - // We know what value must be in the cas field if no other thread stole it. - traded_cas_field expected_sync_cas_field; - expected_sync_cas_field.fill_with_stamp(expected_stamp, thread_id_); - traded_cas_field empty_cas_field; + // We know what value must be in the cas field if no other thread stole it. + traded_cas_field expected_sync_cas_field; + expected_sync_cas_field.fill_with_stamp(expected_stamp, thread_id_); + traded_cas_field empty_cas_field; - if (popped_task->external_trading_deque_cas_.compare_exchange_strong(expected_sync_cas_field, - empty_cas_field, - std::memory_order_acq_rel)) { - return popped_task; - } else { - reset_bot_and_top(); - return nullptr; + if (popped_task->external_trading_deque_cas_.compare_exchange_strong(expected_sync_cas_field, + empty_cas_field, + std::memory_order_acq_rel)) { + return popped_task; + } } + + reset_bot_and_top(); + return nullptr; } external_trading_deque::peek_result external_trading_deque::peek_top() { @@ -95,7 +89,7 @@ external_trading_deque::peek_result external_trading_deque::peek_top() { } } -task * external_trading_deque::pop_top(task *offered_task, peek_result peek_result) { +task *external_trading_deque::pop_top(task *offered_task, peek_result peek_result) { stamped_integer expected_top = peek_result.top_pointer_; auto local_bot = bot_.load(); if (expected_top.value >= local_bot) { diff --git a/lib/pls/src/internal/scheduling/scheduler.cpp b/lib/pls/src/internal/scheduling/scheduler.cpp index 34c75da..2ab7454 100644 --- a/lib/pls/src/internal/scheduling/scheduler.cpp +++ b/lib/pls/src/internal/scheduling/scheduler.cpp @@ -95,7 +95,6 @@ void scheduler::sync() { spawned_task->run_as_task([active_task, spawned_task, &syncing_state](context_switcher::continuation cont) { active_task->continuation_ = std::move(cont); syncing_state.set_active_task(spawned_task); - return slow_return(syncing_state); });