Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
10ca31dc
authored
5 years ago
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First working version of pure fork-join based scheduler.
parent
374153ce
Pipeline
#1244
failed with stages
in 29 seconds
Changes
9
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
151 additions
and
89 deletions
+151
-89
app/benchmark_unbalanced/main.cpp
+24
-27
app/invoke_parallel/main.cpp
+1
-1
lib/pls/include/pls/internal/scheduling/lambda_task.h
+41
-0
lib/pls/include/pls/internal/scheduling/scheduler.h
+14
-0
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+14
-25
lib/pls/include/pls/internal/scheduling/task.h
+1
-3
lib/pls/include/pls/internal/scheduling/thread_state.h
+0
-3
lib/pls/src/internal/scheduling/scheduler.cpp
+52
-7
lib/pls/src/internal/scheduling/task.cpp
+4
-23
No files found.
app/benchmark_unbalanced/main.cpp
View file @
10ca31dc
...
...
@@ -19,15 +19,14 @@ int count_child_nodes(uts::node &node) {
return
child_count
;
}
auto
current_task
=
pls
::
task
::
current
();
std
::
vector
<
int
>
results
(
children
.
size
());
for
(
size_t
i
=
0
;
i
<
children
.
size
();
i
++
)
{
size_t
index
=
i
;
auto
lambda
=
[
&
,
index
]
{
results
[
index
]
=
count_child_nodes
(
children
[
index
]);
};
pls
::
lambda_task_by_value
<
typeof
(
lambda
)
>
sub_task
(
lambda
);
current_task
->
spawn_child
(
sub_task
);
pls
::
scheduler
::
spawn_child
(
sub_task
);
}
current_task
->
wait_for_all
();
pls
::
scheduler
::
wait_for_all
();
for
(
auto
result
:
results
)
{
child_count
+=
result
;
}
...
...
@@ -36,43 +35,41 @@ int count_child_nodes(uts::node &node) {
}
int
unbalanced_tree_search
(
int
seed
,
int
root_children
,
double
q
,
int
normal_children
)
{
static
auto
id
=
pls
::
unique_id
::
create
(
42
);
int
result
;
auto
lambda
=
[
&
]
{
uts
::
node
root
(
seed
,
root_children
,
q
,
normal_children
);
result
=
count_child_nodes
(
root
);
};
pls
::
lambda_task_by_reference
<
typeof
(
lambda
)
>
task
(
lambda
);
pls
::
lambda_task_by_reference
<
typeof
(
lambda
)
>
sub_task
(
lambda
);
pls
::
task
root_task
{
&
sub_task
,
id
}
;
pls
::
scheduler
::
execute_task
(
root_task
);
pls
::
scheduler
::
spawn_child
(
sub_task
)
;
pls
::
scheduler
::
wait_for_all
(
);
return
result
;
}
//
//int main() {
// PROFILE_ENABLE
// pls::internal::helpers::run_mini_benchmark([&] {
// unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
// }, 8, 4000);
//
// PROFILE_SAVE("test_profile.prof")
//}
int
main
()
{
PROFILE_ENABLE
pls
::
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2u
<<
18
};
pls
::
scheduler
scheduler
{
&
my_scheduler_memory
,
8
};
scheduler
.
perform_work
([
&
]
{
PROFILE_MAIN_THREAD
for
(
int
i
=
0
;
i
<
50
;
i
++
)
{
PROFILE_WORK_BLOCK
(
"Top Level"
)
int
result
=
unbalanced_tree_search
(
SEED
,
ROOT_CHILDREN
,
Q
,
NORMAL_CHILDREN
);
std
::
cout
<<
result
<<
std
::
endl
;
}
});
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
unbalanced_tree_search
(
SEED
,
ROOT_CHILDREN
,
Q
,
NORMAL_CHILDREN
);
},
8
,
2000
);
PROFILE_SAVE
(
"test_profile.prof"
)
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 50; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// int result = unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
// std::cout << result << std::endl;
// }
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
This diff is collapsed.
Click to expand it.
app/invoke_parallel/main.cpp
View file @
10ca31dc
...
...
@@ -91,7 +91,7 @@ int main() {
PROFILE_MAIN_THREAD
// Call looks just the same, only requirement is
// the enclosure in the perform_work lambda.
for
(
int
i
=
0
;
i
<
10
00
;
i
++
)
{
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
PROFILE_WORK_BLOCK
(
"Top Level FFT"
)
complex_vector
input
=
initial_input
;
fft
(
input
.
begin
(),
input
.
size
());
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/lambda_task.h
0 → 100644
View file @
10ca31dc
#ifndef PLS_LAMBDA_TASK_H_
#define PLS_LAMBDA_TASK_H_
#include "pls/internal/scheduling/task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
lambda_task_by_reference
:
public
task
{
const
Function
&
function_
;
public
:
explicit
lambda_task_by_reference
(
const
Function
&
function
)
:
task
{},
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
function_
();
}
};
template
<
typename
Function
>
class
lambda_task_by_value
:
public
task
{
const
Function
function_
;
public
:
explicit
lambda_task_by_value
(
const
Function
&
function
)
:
task
{},
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
function_
();
}
};
}
}
}
#endif //PLS_LAMBDA_TASK_H_
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
10ca31dc
...
...
@@ -22,12 +22,23 @@ namespace scheduling {
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
/**
* The scheduler is the central part of the dispatching-framework.
* It manages a pool of worker threads (creates, sleeps/wakes up, destroys)
* and allows to execute parallel sections.
*
* It works in close rellation with the 'task' class for scheduling.
*/
class
scheduler
{
friend
class
task
;
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
base
::
barrier
sync_barrier_
;
task
*
main_thread_root_task_
;
bool
work_section_done_
;
bool
terminated_
;
public
:
/**
...
...
@@ -85,6 +96,9 @@ class scheduler {
task
*
get_local_task
();
task
*
steal_task
();
bool
try_execute_local
();
bool
try_execute_stolen
();
};
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
10ca31dc
...
...
@@ -2,35 +2,30 @@
#ifndef PLS_SCHEDULER_IMPL_H
#define PLS_SCHEDULER_IMPL_H
#include "pls/internal/scheduling/lambda_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
// TODO: generally look into the performance implications of using many thread_state::get() calls
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
// root_task<Function> master{work_section};
//
// // Push root task on stacks
// auto new_master = memory_->task_stack_for(0)->push(master);
// memory_->thread_state_for(0)->root_task_ = new_master;
// memory_->thread_state_for(0)->current_task_ = new_master;
// for (unsigned int i = 1; i < num_threads_; i++) {
// root_worker_task<Function> worker{new_master};
// auto new_worker = memory_->task_stack_for(0)->push(worker);
// memory_->thread_state_for(i)->root_task_ = new_worker;
// memory_->thread_state_for(i)->current_task_ = new_worker;
// }
// if (execute_main_thread) {
// work_section();
//
// // Perform and wait for work
// sync_barrier_.wait(); // Trigger threads to wake up
// sync_barrier_.wait(); // Wait for threads to finish
//
// // Clean up stack
// memory_->task_stack_for(0)->pop<typeof(master)>();
// for (unsigned int i = 1; i < num_threads_; i++) {
// root_worker_task<Function> worker{new_master};
// memory_->task_stack_for(0)->pop<typeof(worker)>();
// } else {
lambda_task_by_reference
<
Function
>
root_task
{
work_section
};
main_thread_root_task_
=
&
root_task
;
work_section_done_
=
false
;
sync_barrier_
.
wait
();
// Trigger threads to wake up
sync_barrier_
.
wait
();
// Wait for threads to finish
// }
}
...
...
@@ -39,12 +34,6 @@ void scheduler::spawn_child(T &sub_task) {
thread_state
::
get
()
->
current_task_
->
spawn_child
(
sub_task
);
}
void
scheduler
::
wait_for_all
()
{
thread_state
::
get
()
->
current_task_
->
wait_for_all
();
}
thread_state
*
scheduler
::
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/task.h
View file @
10ca31dc
...
...
@@ -39,14 +39,12 @@ class task {
private
:
void
execute
();
bool
try_execute_local
();
bool
try_execute_stolen
();
};
template
<
typename
T
>
void
task
::
spawn_child
(
T
&
sub_task
)
{
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
static_assert
(
std
::
is_base_of
<
T
,
task
>::
value
,
"Only pass task subclasses!"
);
static_assert
(
std
::
is_base_of
<
task
,
T
>::
value
,
"Only pass task subclasses!"
);
// Keep our refcount up to date
ref_count_
++
;
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
10ca31dc
...
...
@@ -19,7 +19,6 @@ class task;
struct
thread_state
{
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
scheduler
*
scheduler_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
task
*
root_task_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
task
*
current_task_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
data_structures
::
aligned_stack
*
task_stack_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
data_structures
::
work_stealing_deque
<
task
>
deque_
;
...
...
@@ -28,7 +27,6 @@ struct thread_state {
thread_state
()
:
scheduler_
{
nullptr
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
nullptr
},
deque_
{
task_stack_
},
...
...
@@ -37,7 +35,6 @@ struct thread_state {
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
scheduler_
{
scheduler
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
task_stack
},
deque_
{
task_stack_
},
...
...
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
10ca31dc
...
...
@@ -30,19 +30,37 @@ scheduler::~scheduler() {
}
void
scheduler
::
worker_routine
()
{
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
auto
my_state
=
thread_state
::
get
();
auto
scheduler
=
my_state
->
scheduler_
;
while
(
true
)
{
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
if
(
my_state
->
scheduler_
->
terminated_
)
{
// Wait to be triggered
scheduler
->
sync_barrier_
.
wait
();
// Check for shutdown
if
(
scheduler
->
terminated_
)
{
return
;
}
// The root task must only return when all work is done,
// because of this a simple call is enough to ensure the
// fork-join-section is done (logically joined back into our main thread).
my_state
->
root_task_
->
execute
();
// Execute work
if
(
my_state
->
id_
==
0
)
{
// Main Thread
auto
root_task
=
scheduler
->
main_thread_root_task_
;
root_task
->
parent_
=
nullptr
;
root_task
->
deque_state_
=
my_state
->
deque_
.
save_state
();
root_task
->
execute
();
scheduler
->
work_section_done_
=
true
;
}
else
{
// Worker Threads
while
(
!
scheduler
->
work_section_done_
)
{
if
(
!
scheduler
->
try_execute_local
())
{
scheduler
->
try_execute_stolen
();
}
}
}
// Sync back with main thread
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
}
...
...
@@ -100,6 +118,33 @@ task *scheduler::steal_task() {
return
nullptr
;
}
bool
scheduler
::
try_execute_local
()
{
task
*
local_task
=
get_local_task
();
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
return
true
;
}
else
{
return
false
;
}
}
bool
scheduler
::
try_execute_stolen
()
{
task
*
stolen_task
=
steal_task
();
if
(
stolen_task
!=
nullptr
)
{
stolen_task
->
deque_state_
=
thread_state
::
get
()
->
deque_
.
save_state
();
stolen_task
->
execute
();
return
true
;
}
return
false
;
}
void
scheduler
::
wait_for_all
()
{
thread_state
::
get
()
->
current_task_
->
wait_for_all
();
}
thread_state
*
scheduler
::
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/task.cpp
View file @
10ca31dc
...
...
@@ -36,31 +36,12 @@ void task::execute() {
}
}
bool
task
::
try_execute_local
()
{
task
*
local_task
=
thread_state
::
get
()
->
scheduler_
->
get_local_task
();
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
return
true
;
}
else
{
return
false
;
}
}
bool
task
::
try_execute_stolen
()
{
task
*
stolen_task
=
thread_state
::
get
()
->
scheduler_
->
steal_task
();
if
(
stolen_task
!=
nullptr
)
{
stolen_task
->
deque_state_
=
thread_state
::
get
()
->
deque_
.
save_state
();
stolen_task
->
execute
();
return
true
;
}
return
false
;
}
void
task
::
wait_for_all
()
{
auto
scheduler
=
thread_state
::
get
()
->
scheduler_
;
while
(
ref_count_
>
0
)
{
if
(
!
try_execute_local
())
{
try_execute_stolen
();
if
(
!
scheduler
->
try_execute_local
())
{
scheduler
->
try_execute_stolen
();
}
}
thread_state
::
get
()
->
deque_
.
release_memory_until
(
deque_state_
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment