Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
10ca31dc
authored
Jun 05, 2019
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First working version of pure fork-join based scheduler.
parent
374153ce
Pipeline
#1244
failed with stages
in 29 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
153 additions
and
91 deletions
+153
-91
app/benchmark_unbalanced/main.cpp
+24
-27
app/invoke_parallel/main.cpp
+1
-1
lib/pls/include/pls/internal/scheduling/lambda_task.h
+41
-0
lib/pls/include/pls/internal/scheduling/scheduler.h
+14
-0
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+16
-27
lib/pls/include/pls/internal/scheduling/task.h
+1
-3
lib/pls/include/pls/internal/scheduling/thread_state.h
+0
-3
lib/pls/src/internal/scheduling/scheduler.cpp
+52
-7
lib/pls/src/internal/scheduling/task.cpp
+4
-23
No files found.
app/benchmark_unbalanced/main.cpp
View file @
10ca31dc
...
...
@@ -19,15 +19,14 @@ int count_child_nodes(uts::node &node) {
return
child_count
;
}
auto
current_task
=
pls
::
task
::
current
();
std
::
vector
<
int
>
results
(
children
.
size
());
for
(
size_t
i
=
0
;
i
<
children
.
size
();
i
++
)
{
size_t
index
=
i
;
auto
lambda
=
[
&
,
index
]
{
results
[
index
]
=
count_child_nodes
(
children
[
index
]);
};
pls
::
lambda_task_by_value
<
typeof
(
lambda
)
>
sub_task
(
lambda
);
current_task
->
spawn_child
(
sub_task
);
pls
::
scheduler
::
spawn_child
(
sub_task
);
}
current_task
->
wait_for_all
();
pls
::
scheduler
::
wait_for_all
();
for
(
auto
result
:
results
)
{
child_count
+=
result
;
}
...
...
@@ -36,43 +35,41 @@ int count_child_nodes(uts::node &node) {
}
int
unbalanced_tree_search
(
int
seed
,
int
root_children
,
double
q
,
int
normal_children
)
{
static
auto
id
=
pls
::
unique_id
::
create
(
42
);
int
result
;
auto
lambda
=
[
&
]
{
uts
::
node
root
(
seed
,
root_children
,
q
,
normal_children
);
result
=
count_child_nodes
(
root
);
};
pls
::
lambda_task_by_reference
<
typeof
(
lambda
)
>
task
(
lambda
);
pls
::
lambda_task_by_reference
<
typeof
(
lambda
)
>
sub_task
(
lambda
);
pls
::
task
root_task
{
&
sub_task
,
id
}
;
pls
::
scheduler
::
execute_task
(
root_task
);
pls
::
scheduler
::
spawn_child
(
sub_task
)
;
pls
::
scheduler
::
wait_for_all
(
);
return
result
;
}
//
//int main() {
// PROFILE_ENABLE
// pls::internal::helpers::run_mini_benchmark([&] {
// unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
// }, 8, 4000);
//
// PROFILE_SAVE("test_profile.prof")
//}
int
main
()
{
PROFILE_ENABLE
pls
::
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2u
<<
18
};
pls
::
scheduler
scheduler
{
&
my_scheduler_memory
,
8
};
scheduler
.
perform_work
([
&
]
{
PROFILE_MAIN_THREAD
for
(
int
i
=
0
;
i
<
50
;
i
++
)
{
PROFILE_WORK_BLOCK
(
"Top Level"
)
int
result
=
unbalanced_tree_search
(
SEED
,
ROOT_CHILDREN
,
Q
,
NORMAL_CHILDREN
);
std
::
cout
<<
result
<<
std
::
endl
;
}
});
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
unbalanced_tree_search
(
SEED
,
ROOT_CHILDREN
,
Q
,
NORMAL_CHILDREN
);
},
8
,
2000
);
PROFILE_SAVE
(
"test_profile.prof"
)
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 50; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// int result = unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
// std::cout << result << std::endl;
// }
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
app/invoke_parallel/main.cpp
View file @
10ca31dc
...
...
@@ -91,7 +91,7 @@ int main() {
PROFILE_MAIN_THREAD
// Call looks just the same, only requirement is
// the enclosure in the perform_work lambda.
for
(
int
i
=
0
;
i
<
10
00
;
i
++
)
{
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
PROFILE_WORK_BLOCK
(
"Top Level FFT"
)
complex_vector
input
=
initial_input
;
fft
(
input
.
begin
(),
input
.
size
());
...
...
lib/pls/include/pls/internal/scheduling/lambda_task.h
0 → 100644
View file @
10ca31dc
#ifndef PLS_LAMBDA_TASK_H_
#define PLS_LAMBDA_TASK_H_
#include "pls/internal/scheduling/task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
lambda_task_by_reference
:
public
task
{
const
Function
&
function_
;
public
:
explicit
lambda_task_by_reference
(
const
Function
&
function
)
:
task
{},
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
function_
();
}
};
template
<
typename
Function
>
class
lambda_task_by_value
:
public
task
{
const
Function
function_
;
public
:
explicit
lambda_task_by_value
(
const
Function
&
function
)
:
task
{},
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
function_
();
}
};
}
}
}
#endif //PLS_LAMBDA_TASK_H_
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
10ca31dc
...
...
@@ -22,12 +22,23 @@ namespace scheduling {
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
/**
* The scheduler is the central part of the dispatching-framework.
* It manages a pool of worker threads (creates, sleeps/wakes up, destroys)
* and allows to execute parallel sections.
*
* It works in close rellation with the 'task' class for scheduling.
*/
class
scheduler
{
friend
class
task
;
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
base
::
barrier
sync_barrier_
;
task
*
main_thread_root_task_
;
bool
work_section_done_
;
bool
terminated_
;
public
:
/**
...
...
@@ -85,6 +96,9 @@ class scheduler {
task
*
get_local_task
();
task
*
steal_task
();
bool
try_execute_local
();
bool
try_execute_stolen
();
};
}
...
...
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
10ca31dc
...
...
@@ -2,35 +2,30 @@
#ifndef PLS_SCHEDULER_IMPL_H
#define PLS_SCHEDULER_IMPL_H
#include "pls/internal/scheduling/lambda_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
// TODO: generally look into the performance implications of using many thread_state::get() calls
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
// root_task<Function> master{work_section};
//
// // Push root task on stacks
// auto new_master = memory_->task_stack_for(0)->push(master);
// memory_->thread_state_for(0)->root_task_ = new_master;
// memory_->thread_state_for(0)->current_task_ = new_master;
// for (unsigned int i = 1; i < num_threads_; i++) {
// root_worker_task<Function> worker{new_master};
// auto new_worker = memory_->task_stack_for(0)->push(worker);
// memory_->thread_state_for(i)->root_task_ = new_worker;
// memory_->thread_state_for(i)->current_task_ = new_worker;
// }
//
// // Perform and wait for work
// sync_barrier_.wait(); // Trigger threads to wake up
// sync_barrier_.wait(); // Wait for threads to finish
// if (execute_main_thread) {
// work_section();
//
// // Clean up stack
// memory_->task_stack_for(0)->pop<typeof(master)>();
// for (unsigned int i = 1; i < num_threads_; i++) {
// root_worker_task<Function> worker{new_master};
// memory_->task_stack_for(0)->pop<typeof(worker)>();
// sync_barrier_.wait(); // Trigger threads to wake up
// sync_barrier_.wait(); // Wait for threads to finish
// } else {
lambda_task_by_reference
<
Function
>
root_task
{
work_section
};
main_thread_root_task_
=
&
root_task
;
work_section_done_
=
false
;
sync_barrier_
.
wait
();
// Trigger threads to wake up
sync_barrier_
.
wait
();
// Wait for threads to finish
// }
}
...
...
@@ -39,12 +34,6 @@ void scheduler::spawn_child(T &sub_task) {
thread_state
::
get
()
->
current_task_
->
spawn_child
(
sub_task
);
}
void
scheduler
::
wait_for_all
()
{
thread_state
::
get
()
->
current_task_
->
wait_for_all
();
}
thread_state
*
scheduler
::
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
}
}
}
...
...
lib/pls/include/pls/internal/scheduling/task.h
View file @
10ca31dc
...
...
@@ -39,14 +39,12 @@ class task {
private
:
void
execute
();
bool
try_execute_local
();
bool
try_execute_stolen
();
};
template
<
typename
T
>
void
task
::
spawn_child
(
T
&
sub_task
)
{
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
static_assert
(
std
::
is_base_of
<
T
,
task
>::
value
,
"Only pass task subclasses!"
);
static_assert
(
std
::
is_base_of
<
task
,
T
>::
value
,
"Only pass task subclasses!"
);
// Keep our refcount up to date
ref_count_
++
;
...
...
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
10ca31dc
...
...
@@ -19,7 +19,6 @@ class task;
struct
thread_state
{
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
scheduler
*
scheduler_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
task
*
root_task_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
task
*
current_task_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
data_structures
::
aligned_stack
*
task_stack_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
data_structures
::
work_stealing_deque
<
task
>
deque_
;
...
...
@@ -28,7 +27,6 @@ struct thread_state {
thread_state
()
:
scheduler_
{
nullptr
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
nullptr
},
deque_
{
task_stack_
},
...
...
@@ -37,7 +35,6 @@ struct thread_state {
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
scheduler_
{
scheduler
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
task_stack
},
deque_
{
task_stack_
},
...
...
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
10ca31dc
...
...
@@ -30,19 +30,37 @@ scheduler::~scheduler() {
}
void
scheduler
::
worker_routine
()
{
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
auto
my_state
=
thread_state
::
get
();
auto
scheduler
=
my_state
->
scheduler_
;
while
(
true
)
{
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
if
(
my_state
->
scheduler_
->
terminated_
)
{
// Wait to be triggered
scheduler
->
sync_barrier_
.
wait
();
// Check for shutdown
if
(
scheduler
->
terminated_
)
{
return
;
}
// The root task must only return when all work is done,
// because of this a simple call is enough to ensure the
// fork-join-section is done (logically joined back into our main thread).
my_state
->
root_task_
->
execute
();
// Execute work
if
(
my_state
->
id_
==
0
)
{
// Main Thread
auto
root_task
=
scheduler
->
main_thread_root_task_
;
root_task
->
parent_
=
nullptr
;
root_task
->
deque_state_
=
my_state
->
deque_
.
save_state
();
root_task
->
execute
();
scheduler
->
work_section_done_
=
true
;
}
else
{
// Worker Threads
while
(
!
scheduler
->
work_section_done_
)
{
if
(
!
scheduler
->
try_execute_local
())
{
scheduler
->
try_execute_stolen
();
}
}
}
// Sync back with main thread
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
}
...
...
@@ -100,6 +118,33 @@ task *scheduler::steal_task() {
return
nullptr
;
}
bool
scheduler
::
try_execute_local
()
{
task
*
local_task
=
get_local_task
();
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
return
true
;
}
else
{
return
false
;
}
}
bool
scheduler
::
try_execute_stolen
()
{
task
*
stolen_task
=
steal_task
();
if
(
stolen_task
!=
nullptr
)
{
stolen_task
->
deque_state_
=
thread_state
::
get
()
->
deque_
.
save_state
();
stolen_task
->
execute
();
return
true
;
}
return
false
;
}
void
scheduler
::
wait_for_all
()
{
thread_state
::
get
()
->
current_task_
->
wait_for_all
();
}
thread_state
*
scheduler
::
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
}
}
}
lib/pls/src/internal/scheduling/task.cpp
View file @
10ca31dc
...
...
@@ -36,31 +36,12 @@ void task::execute() {
}
}
bool
task
::
try_execute_local
()
{
task
*
local_task
=
thread_state
::
get
()
->
scheduler_
->
get_local_task
();
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
return
true
;
}
else
{
return
false
;
}
}
bool
task
::
try_execute_stolen
()
{
task
*
stolen_task
=
thread_state
::
get
()
->
scheduler_
->
steal_task
();
if
(
stolen_task
!=
nullptr
)
{
stolen_task
->
deque_state_
=
thread_state
::
get
()
->
deque_
.
save_state
();
stolen_task
->
execute
();
return
true
;
}
return
false
;
}
void
task
::
wait_for_all
()
{
auto
scheduler
=
thread_state
::
get
()
->
scheduler_
;
while
(
ref_count_
>
0
)
{
if
(
!
try_execute_local
())
{
try_execute_stolen
();
if
(
!
scheduler
->
try_execute_local
())
{
scheduler
->
try_execute_stolen
();
}
}
thread_state
::
get
()
->
deque_
.
release_memory_until
(
deque_state_
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment