Sketch minimal serial calling sequence.

The current state shows the minimum actions taken to execute a parallel call: get the thread local, find the active frame, execute on the next frame and return to the active frame.

Sketch minimal serial calling sequence.
The current state shows the minimum actions taken to execute a parallel call: get the thread local, find the active frame, execute on the next frame and return to the active frame.
625836aa · FritzFlorian · 83c6e622 · 625836aa · 625836aa · 625836aa
Commit 625836aa authored Jan 24, 2020 by FritzFlorian
6 changed files
--- a/app/benchmark_fib/main.cpp
+++ b/app/benchmark_fib/main.cpp
@@ -17,15 +17,20 @@ int pls_fib(int n) {
    return 1;
  }

-  int a = pls_fib(n - 1);
-  int b = pls_fib(n - 2);
+  int a, b;
+  scheduler::spawn([n, &a]() {
+    a = pls_fib(n - 1);
+  });
+  scheduler::spawn([n, &b]() {
+    b = pls_fib(n - 2);
+  });

  return a + b;
 }

 constexpr int MAX_NUM_THREADS = 1;
 constexpr int MAX_NUM_TASKS = 64;
-constexpr int MAX_STACK_SIZE = 128;
+constexpr int MAX_STACK_SIZE = 256;

 int main(int argc, char **argv) {
  int num_threads;

--- a/app/playground/main.cpp
+++ b/app/playground/main.cpp
 #include <utility>
 #include <cstdio>
+#include <chrono>

 #include "context_switcher/context_switcher.h"
+using namespace context_switcher;
+using namespace std;
+
+const size_t NUM_RUNS = 1000;

 // Memory for custom stack and continuation semantics
-const size_t STACK_SIZE = 512 * 8;
-char custom_stack_1[STACK_SIZE];
+const size_t STACK_SIZE = 512 * 1;
+const size_t NUM_STACKS = 64;
+char custom_stacks[NUM_STACKS][STACK_SIZE];

-// Force disable optimization
-volatile int value = 0;
+int fib(int n) {
+  if (n <= 1) {
+    return 1;
+  }

-int main() {
-  using namespace context_switcher;
-
-  printf("Main 1!\n");
-  auto cont_2 = enter_context(custom_stack_1, STACK_SIZE, [](continuation &&cont_main) {
-    printf("Stack 1!\n");
-    cont_main = switch_context(std::move(cont_main));
-    printf("Stack 2!\n");
-    return std::move(cont_main);
+  int a, b;
+  enter_context(custom_stacks[n], STACK_SIZE, [n, &a](continuation &&cont) {
+    a = fib(n - 1);
+    return std::move(cont);
+  });
+  enter_context(custom_stacks[n], STACK_SIZE, [n, &b](continuation &&cont) {
+    b = fib(n - 2);
+    return std::move(cont);
  });
-  printf("Main 2!\n");
-  cont_2 = switch_context(std::move(cont_2));
-  printf("Main 3!\n");
+
+  return a + b;
+}
+
+volatile int result;
+int main() {
+  auto start_time = chrono::steady_clock::now();
+  for (unsigned int i = 0; i < NUM_RUNS; i++) {
+    result = fib(18);
+  }
+  auto end_time = chrono::steady_clock::now();
+  auto time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count();
+
+  printf("%f", (float) time / NUM_RUNS);
+
  return 0;
 }
--- a/lib/context_switcher/include/context_switcher/lambda_capture.h
+++ b/lib/context_switcher/include/context_switcher/lambda_capture.h
@@ -19,8 +19,8 @@ namespace context_switcher {

 template<typename F>
 struct lambda_capture {
-  // TODO: Check if we need an extra template here to perform the move
-  explicit lambda_capture(F &&lambda) : lambda_{std::forward<F>(lambda)} {}
+  template<typename FARG>
+  explicit lambda_capture(FARG &&lambda) : lambda_{std::forward<FARG>(lambda)} {}

  assembly_bindings::continuation_t operator()(assembly_bindings::continuation_t continuation_pointer) {
    continuation cont = lambda_(continuation{continuation_pointer});
@@ -44,8 +44,8 @@ assembly_bindings::continuation_t lambda_capture_callback(assembly_bindings::con
 }

 template<typename F>
-static lambda_capture<F> *place_lambda_capture(F &&lambda, char *memory) {
-  return new(memory) lambda_capture<F>(std::forward<F>(lambda));
+static lambda_capture<typename std::remove_reference<F>::type> *place_lambda_capture(F &&lambda, char *memory) {
+  return new(memory) lambda_capture<typename std::remove_reference<F>::type>(std::forward<F>(lambda));
 }

 }

--- a/lib/pls/include/pls/internal/scheduling/scheduler.h
+++ b/lib/pls/include/pls/internal/scheduling/scheduler.h
@@ -54,12 +54,8 @@ class scheduler {
  void perform_work(Function work_section);

  template<typename Function>
-  void spawn(Function &&lambda) {
-    // TODO: place function on next active
-    // TODO: capture continuation in current active
-    // TODO: advance current active
-    // TODO: after finish, return to last active (if not stolen)
-    // TODO: revert current active
+  static void spawn(Function &&lambda) {
+    thread_state::get().get_task_manager().spawn_child(std::forward<Function>(lambda));
  }

  /**

--- a/lib/pls/include/pls/internal/scheduling/task.h
+++ b/lib/pls/include/pls/internal/scheduling/task.h
@@ -2,6 +2,7 @@
 #define PLS_TASK_H

 #include <utility>
+#include <atomic>

 #include "context_switcher/continuation.h"
 #include "context_switcher/context_switcher.h"
@@ -61,11 +62,22 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) task {
    parent_task_ = parent_task;
  }

+  context_switcher::continuation get_continuation() {
+    return std::move(continuation_);
+  }
+  void set_continuation(context_switcher::continuation &&continuation) {
+    continuation_ = std::move(continuation);
+  }
+
  template<typename F>
  context_switcher::continuation run_as_task(F &&lambda) {
    return context_switcher::enter_context(stack_memory_, stack_size_, std::forward<F>(lambda));
  }

+  // TODO: Remove and add proper version
+  // Simulate 'fast' syncronization
+  std::atomic<int> flag_{0};
+
 private:
  // Stack/Continuation Management
  char *stack_memory_;

--- a/lib/pls/include/pls/internal/scheduling/task_manager.h
+++ b/lib/pls/include/pls/internal/scheduling/task_manager.h
@@ -6,6 +6,8 @@
 #include <utility>
 #include <array>

+#include "context_switcher/continuation.h"
+
 #include "pls/internal/scheduling/task.h"

 #include "pls/internal/data_structures/bounded_trading_deque.h"
@@ -54,6 +56,31 @@ class task_manager {
    return *active_task_;
  }

+  template<typename F>
+  void spawn_child(F &&lambda) {
+    // TODO: Here is some potential for optimization. We could try placing everything manually on the stack.
+    active_task_->get_next()->run_as_task([lambda, this](context_switcher::continuation cont) {
+      auto *last_task = active_task_;
+      auto *this_task = active_task_->get_next();
+
+      last_task->set_continuation(std::move(cont));
+      active_task_ = this_task;
+
+      // TODO: Publish last task on deque (do this properly, but this simulates the fastest possible impl)
+//      last_task->flag_.store(1, std::memory_order_seq_cst);
+
+      lambda();
+
+      // TODO: Check if task was stolen from deque (do this properly, but this simulates the fastest possible impl)
+//      if (last_task->flag_.exchange(0, std::memory_order_seq_cst) == 1) {
+      active_task_ = last_task;
+      return std::move(last_task->get_continuation());
+//      } else {
+//        return context_switcher::continuation{nullptr};
+//      }
+    });
+  }
+
 private:
  size_t num_tasks_;