Commit 4e135d7e by FritzFlorian

Rework scheduler_memory to remove virtual functions.

This was highlighted as an issue in the steal_work routine, as frequent calls to virtual functions are expensive.
parent 3bdaba42
...@@ -14,11 +14,36 @@ void worker_routine(); ...@@ -14,11 +14,36 @@ void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>; using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler_memory { class scheduler_memory {
private:
size_t max_threads_;
thread_state **thread_states_;
scheduler_thread **threads_;
data_structures::aligned_stack **task_stacks_;
protected:
void init(size_t max_therads,
thread_state **thread_states,
scheduler_thread **threads,
data_structures::aligned_stack **task_stacks) {
max_threads_ = max_therads;
thread_states_ = thread_states;
threads_ = threads;
task_stacks_ = task_stacks;
}
public: public:
virtual size_t max_threads() const = 0; size_t max_threads() const {
virtual thread_state *thread_state_for(size_t id) = 0; return max_threads_;
virtual scheduler_thread *thread_for(size_t id) = 0; }
virtual data_structures::aligned_stack *task_stack_for(size_t id) = 0; thread_state *thread_state_for(size_t id) const {
return thread_states_[id];
}
scheduler_thread *thread_for(size_t id) const {
return threads_[id];
}
data_structures::aligned_stack *task_stack_for(size_t id) const {
return task_stacks_[id];
}
}; };
template<size_t MAX_THREADS, size_t TASK_STACK_SIZE> template<size_t MAX_THREADS, size_t TASK_STACK_SIZE>
...@@ -31,23 +56,30 @@ class static_scheduler_memory : public scheduler_memory { ...@@ -31,23 +56,30 @@ class static_scheduler_memory : public scheduler_memory {
using aligned_thread_stack = base::alignment::aligned_wrapper<std::array<char, TASK_STACK_SIZE>>; using aligned_thread_stack = base::alignment::aligned_wrapper<std::array<char, TASK_STACK_SIZE>>;
using aligned_aligned_stack = base::alignment::aligned_wrapper<data_structures::aligned_stack>; using aligned_aligned_stack = base::alignment::aligned_wrapper<data_structures::aligned_stack>;
// Actual Memory
std::array<aligned_thread, MAX_THREADS> threads_; std::array<aligned_thread, MAX_THREADS> threads_;
std::array<aligned_thread_state, MAX_THREADS> thread_states_; std::array<aligned_thread_state, MAX_THREADS> thread_states_;
std::array<aligned_thread_stack, MAX_THREADS> task_stacks_memory_; std::array<aligned_thread_stack, MAX_THREADS> task_stacks_memory_;
std::array<aligned_aligned_stack, MAX_THREADS> task_stacks_; std::array<aligned_aligned_stack, MAX_THREADS> task_stacks_;
// References for parent
std::array<scheduler_thread *, MAX_THREADS> thread_refs_;
std::array<thread_state *, MAX_THREADS> thread_state_refs_;
std::array<data_structures::aligned_stack *, MAX_THREADS> task_stack_refs_;
public: public:
static_scheduler_memory() { static_scheduler_memory() : scheduler_memory() {
for (size_t i = 0; i < MAX_THREADS; i++) { for (size_t i = 0; i < MAX_THREADS; i++) {
new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(), new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(),
TASK_STACK_SIZE); TASK_STACK_SIZE);
thread_refs_[i] = threads_[i].pointer();
thread_state_refs_[i] = thread_states_[i].pointer();
task_stack_refs_[i] = task_stacks_[i].pointer();
} }
}
size_t max_threads() const override { return MAX_THREADS; } init(MAX_THREADS, thread_state_refs_.data(), thread_refs_.data(), task_stack_refs_.data());
thread_state *thread_state_for(size_t id) override { return thread_states_[id].pointer(); } }
scheduler_thread *thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack *task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
}; };
class malloc_scheduler_memory : public scheduler_memory { class malloc_scheduler_memory : public scheduler_memory {
...@@ -60,18 +92,20 @@ class malloc_scheduler_memory : public scheduler_memory { ...@@ -60,18 +92,20 @@ class malloc_scheduler_memory : public scheduler_memory {
const size_t num_threads_; const size_t num_threads_;
// Actual Memory
aligned_thread *threads_; aligned_thread *threads_;
aligned_thread_state *thread_states_; aligned_thread_state *thread_states_;
char **task_stacks_memory_; char **task_stacks_memory_;
aligned_aligned_stack *task_stacks_; aligned_aligned_stack *task_stacks_;
// References for parent
scheduler_thread **thread_refs_;
thread_state **thread_state_refs_;
data_structures::aligned_stack **task_stack_refs_;
public: public:
explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16); explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16);
~malloc_scheduler_memory(); ~malloc_scheduler_memory();
size_t max_threads() const override { return num_threads_; }
thread_state *thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread *thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack *task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
}; };
} }
......
...@@ -10,14 +10,25 @@ malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const ...@@ -10,14 +10,25 @@ malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const
reinterpret_cast<aligned_thread *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread))); reinterpret_cast<aligned_thread *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread)));
thread_states_ = reinterpret_cast<aligned_thread_state *>(base::alignment::allocate_aligned( thread_states_ = reinterpret_cast<aligned_thread_state *>(base::alignment::allocate_aligned(
num_threads * sizeof(aligned_thread_state))); num_threads * sizeof(aligned_thread_state)));
task_stacks_ = reinterpret_cast<aligned_aligned_stack *>(base::alignment::allocate_aligned( task_stacks_ = reinterpret_cast<aligned_aligned_stack *>(base::alignment::allocate_aligned(
num_threads * sizeof(aligned_aligned_stack))); num_threads * sizeof(aligned_aligned_stack)));
task_stacks_memory_ = reinterpret_cast<char **>(base::alignment::allocate_aligned(num_threads * sizeof(char *))); task_stacks_memory_ = reinterpret_cast<char **>(base::alignment::allocate_aligned(num_threads * sizeof(char *)));
thread_refs_ = static_cast<scheduler_thread **>(malloc(num_threads * sizeof(scheduler_thread *)));
thread_state_refs_ = static_cast<thread_state **>(malloc(num_threads * sizeof(thread_state *)));
task_stack_refs_ =
static_cast<data_structures::aligned_stack **>(malloc(num_threads * sizeof(data_structures::aligned_stack *)));
for (size_t i = 0; i < num_threads_; i++) { for (size_t i = 0; i < num_threads_; i++) {
task_stacks_memory_[i] = reinterpret_cast<char *>(base::alignment::allocate_aligned(memory_per_stack)); task_stacks_memory_[i] = reinterpret_cast<char *>(base::alignment::allocate_aligned(memory_per_stack));
new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack); new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack);
thread_refs_[i] = threads_[i].pointer();
thread_state_refs_[i] = thread_states_[i].pointer();
task_stack_refs_[i] = task_stacks_[i].pointer();
} }
init(num_threads, thread_state_refs_, thread_refs_, task_stack_refs_);
} }
malloc_scheduler_memory::~malloc_scheduler_memory() { malloc_scheduler_memory::~malloc_scheduler_memory() {
...@@ -29,6 +40,10 @@ malloc_scheduler_memory::~malloc_scheduler_memory() { ...@@ -29,6 +40,10 @@ malloc_scheduler_memory::~malloc_scheduler_memory() {
} }
free(task_stacks_); free(task_stacks_);
free(task_stacks_memory_); free(task_stacks_memory_);
free(thread_refs_);
free(thread_state_refs_);
free(task_stack_refs_);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment