From 0228aa92eb3196fdb66c8e66eee865be6a31f5bb Mon Sep 17 00:00:00 2001
From: FritzFlorian <flo.fritz@t-online.de>
Date: Fri, 7 Jun 2019 10:46:28 +0200
Subject: [PATCH] Allow to re-use main thread as worker.

---
 lib/pls/include/pls/internal/scheduling/scheduler.h      |  3 ++-
 lib/pls/include/pls/internal/scheduling/scheduler_impl.h | 29 ++++++++++++++++++++---------
 lib/pls/include/pls/internal/scheduling/task.h           |  3 ++-
 lib/pls/src/internal/scheduling/scheduler.cpp            | 10 ++++++++--
 4 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h
index 54075fc..fb9eb20 100644
--- a/lib/pls/include/pls/internal/scheduling/scheduler.h
+++ b/lib/pls/include/pls/internal/scheduling/scheduler.h
@@ -32,6 +32,7 @@ using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
 class scheduler {
   friend class task;
   const unsigned int num_threads_;
+  const bool reuse_thread_;
   scheduler_memory *memory_;
 
   base::barrier sync_barrier_;
@@ -49,7 +50,7 @@ class scheduler {
    * @param memory All memory is allocated statically, thus the user is required to provide the memory instance.
    * @param num_threads The number of worker threads to be created.
    */
-  explicit scheduler(scheduler_memory *memory, unsigned int num_threads);
+  explicit scheduler(scheduler_memory *memory, unsigned int num_threads, bool reuse_thread = true);
 
   /**
    * The scheduler is implicitly terminated as soon as it leaves the scope.
diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h
index 98156dc..50f7b04 100644
--- a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+++ b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h
@@ -14,19 +14,30 @@ template<typename Function>
 void scheduler::perform_work(Function work_section) {
   PROFILE_WORK_BLOCK("scheduler::perform_work")
 
-//  if (execute_main_thread) {
-//    work_section();
-//
-//    sync_barrier_.wait(); // Trigger threads to wake up
-//    sync_barrier_.wait(); // Wait for threads to finish
-//  } else {
+  // Prepare main root task
   lambda_task_by_reference<Function> root_task{work_section};
   main_thread_root_task_ = &root_task;
   work_section_done_ = false;
 
-  sync_barrier_.wait(); // Trigger threads to wake up
-  sync_barrier_.wait(); // Wait for threads to finish
-//  }
+  if (reuse_thread_) {
+    // TODO: See if we should change thread-states to not make our state override the current thread state
+    auto my_state = memory_->thread_state_for(0);
+    base::this_thread::set_state(my_state); // Make THIS THREAD become the main worker
+
+    sync_barrier_.wait(); // Trigger threads to wake up
+
+    // Do work (see if we can remove this duplicated code)
+    root_task.parent_ = nullptr;
+    root_task.deque_state_ = my_state->deque_.save_state();
+    root_task.execute();
+    work_section_done_ = true;
+
+    sync_barrier_.wait(); // Wait for threads to finish
+  } else {
+    // Simply trigger the others to do the work, this thread will sleep/wait for the time being
+    sync_barrier_.wait(); // Trigger threads to wake up
+    sync_barrier_.wait(); // Wait for threads to finish
+  }
 }
 
 template<typename T>
diff --git a/lib/pls/include/pls/internal/scheduling/task.h b/lib/pls/include/pls/internal/scheduling/task.h
index cdeb9f2..7bb4a4b 100644
--- a/lib/pls/include/pls/internal/scheduling/task.h
+++ b/lib/pls/include/pls/internal/scheduling/task.h
@@ -62,12 +62,13 @@ void task::spawn_child(T &&sub_task) {
 
 template<typename T>
 void task::spawn_child_and_wait(T &&sub_task) {
-  PROFILE_FORK_JOIN_STEALING("spawn_child")
+  PROFILE_FORK_JOIN_STEALING("spawn_child_wait")
   static_assert(std::is_base_of<task, typename std::remove_reference<T>::type>::value, "Only pass task subclasses!");
 
   // Assign forced values (for stack and parent management)
   sub_task.parent_ = nullptr;
   sub_task.deque_state_ = thread_state::get()->deque_.save_state();
+  PROFILE_END_BLOCK
   sub_task.execute();
 
   wait_for_all();
diff --git a/lib/pls/src/internal/scheduling/scheduler.cpp b/lib/pls/src/internal/scheduling/scheduler.cpp
index 4a36132..3b91f0e 100644
--- a/lib/pls/src/internal/scheduling/scheduler.cpp
+++ b/lib/pls/src/internal/scheduling/scheduler.cpp
@@ -8,10 +8,11 @@ namespace pls {
 namespace internal {
 namespace scheduling {
 
-scheduler::scheduler(scheduler_memory *memory, const unsigned int num_threads) :
+scheduler::scheduler(scheduler_memory *memory, const unsigned int num_threads, bool reuse_thread) :
     num_threads_{num_threads},
+    reuse_thread_{reuse_thread},
     memory_{memory},
-    sync_barrier_{num_threads + 1},
+    sync_barrier_{num_threads + 1 - reuse_thread},
     terminated_{false} {
   if (num_threads_ > memory_->max_threads()) {
     PLS_ERROR("Tried to create scheduler with more OS threads than pre-allocated memory.");
@@ -20,8 +21,13 @@ scheduler::scheduler(scheduler_memory *memory, const unsigned int num_threads) :
   for (unsigned int i = 0; i < num_threads_; i++) {
     // Placement new is required, as the memory of `memory_` is not required to be initialized.
     new((void *) memory_->thread_state_for(i)) thread_state{this, memory_->task_stack_for(i), i};
+
+    if (reuse_thread && i == 0) {
+      continue; // Skip over first/main thread when re-using the users thread, as this one will replace the first one.
+    }
     new((void *) memory_->thread_for(i))base::thread<void (*)(), thread_state>(&scheduler::worker_routine,
                                                                                memory_->thread_state_for(i));
+
   }
 }
 
--
libgit2 0.26.0