Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3c60e8d7
authored
5 years ago
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First working version on both ARM and x86.
parent
731b47c5
master
Pipeline
#1403
failed with stages
in 39 seconds
Changes
19
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
88 additions
and
55 deletions
+88
-55
app/benchmark_fft/CMakeLists.txt
+3
-3
app/benchmark_fft/main.cpp
+2
-2
app/benchmark_fib/CMakeLists.txt
+3
-3
app/benchmark_fib/main.cpp
+1
-1
app/benchmark_matrix/CMakeLists.txt
+3
-3
app/benchmark_matrix/main.cpp
+2
-2
cmake/SetupOptimizationLevel.cmake
+1
-1
lib/context_switcher/include/context_switcher/context_switcher.h
+3
-0
lib/context_switcher/include/context_switcher/continuation.h
+0
-4
lib/pls/CMakeLists.txt
+1
-1
lib/pls/include/pls/internal/base/error_handling.h
+1
-1
lib/pls/include/pls/internal/base/system_details.h
+18
-0
lib/pls/include/pls/internal/scheduling/external_trading_deque.h
+12
-10
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+3
-2
lib/pls/include/pls/internal/scheduling/task_manager.h
+1
-1
lib/pls/include/pls/internal/scheduling/task_manager_impl.h
+1
-1
lib/pls/include/pls/internal/scheduling/thread_state.h
+7
-6
lib/pls/src/internal/scheduling/task_manager.cpp
+14
-14
lib/pls/src/internal/scheduling/thread_state.cpp
+12
-0
No files found.
app/benchmark_fft/CMakeLists.txt
View file @
3c60e8d7
add_executable
(
benchmark_fft_pls_v
2
main.cpp
)
target_link_libraries
(
benchmark_fft_pls_v
2
pls benchmark_runner benchmark_base
)
add_executable
(
benchmark_fft_pls_v
3
main.cpp
)
target_link_libraries
(
benchmark_fft_pls_v
3
pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_fft_pls_v
2
easy_profiler
)
target_link_libraries
(
benchmark_fft_pls_v
3
easy_profiler
)
endif
()
This diff is collapsed.
Click to expand it.
app/benchmark_fft/main.cpp
View file @
3c60e8d7
...
...
@@ -37,7 +37,7 @@ void conquer(fft::complex_vector::iterator data, int n) {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
6
4
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
@@ -49,7 +49,7 @@ int main(int argc, char **argv) {
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v
2
/"
;
string
full_directory
=
directory
+
"/PLS_v
3
/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
fft
::
complex_vector
data
=
fft
::
generate_input
();
...
...
This diff is collapsed.
Click to expand it.
app/benchmark_fib/CMakeLists.txt
View file @
3c60e8d7
add_executable
(
benchmark_fib_pls_v
2
main.cpp
)
target_link_libraries
(
benchmark_fib_pls_v
2
pls benchmark_runner benchmark_base
)
add_executable
(
benchmark_fib_pls_v
3
main.cpp
)
target_link_libraries
(
benchmark_fib_pls_v
3
pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_fib_pls_v
2
easy_profiler
)
target_link_libraries
(
benchmark_fib_pls_v
3
easy_profiler
)
endif
()
This diff is collapsed.
Click to expand it.
app/benchmark_fib/main.cpp
View file @
3c60e8d7
...
...
@@ -33,7 +33,7 @@ int pls_fib(int n) {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
1
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
This diff is collapsed.
Click to expand it.
app/benchmark_matrix/CMakeLists.txt
View file @
3c60e8d7
add_executable
(
benchmark_matrix_pls_v
2
main.cpp
)
target_link_libraries
(
benchmark_matrix_pls_v
2
pls benchmark_runner benchmark_base
)
add_executable
(
benchmark_matrix_pls_v
3
main.cpp
)
target_link_libraries
(
benchmark_matrix_pls_v
3
pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_matrix_pls_v
2
easy_profiler
)
target_link_libraries
(
benchmark_matrix_pls_v
3
easy_profiler
)
endif
()
This diff is collapsed.
Click to expand it.
app/benchmark_matrix/main.cpp
View file @
3c60e8d7
...
...
@@ -23,7 +23,7 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
1
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v
2
/"
;
string
full_directory
=
directory
+
"/PLS_v
3
/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
pls_matrix
<
double
,
matrix
::
MATRIX_SIZE
>
a
;
...
...
This diff is collapsed.
Click to expand it.
cmake/SetupOptimizationLevel.cmake
View file @
3c60e8d7
...
...
@@ -18,7 +18,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
# but inlining functions and SIMD/Vectorization is
# only enabled by -O3, thus it's way faster in some
# array calculations.
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-O
3
-march=native"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-O
2
-march=native"
)
set
(
CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE
)
else
()
set
(
CMAKE_CXX_FLAGS_DEBUG
"-g -O0"
)
...
...
This diff is collapsed.
Click to expand it.
lib/context_switcher/include/context_switcher/context_switcher.h
View file @
3c60e8d7
...
...
@@ -111,6 +111,9 @@ void lambda_capture_callback(fcontext::transfer_t transfer) {
lambda_capture
->~
T
();
continuation_t
cont_pointer
=
cont
.
consume
();
if
(
cont_pointer
==
nullptr
)
{
printf
(
"Error!!!
\n
"
);
}
fcontext
::
jump_fcontext
(
cont_pointer
,
(
void
*
)
0
);
}
...
...
This diff is collapsed.
Click to expand it.
lib/context_switcher/include/context_switcher/continuation.h
View file @
3c60e8d7
...
...
@@ -50,10 +50,6 @@ struct continuation {
}
continuation_t
consume
()
{
if
(
cont_pointer_
==
nullptr
)
{
printf
(
"Error!
\n
"
);
}
auto
tmp
=
cont_pointer_
;
cont_pointer_
=
nullptr
;
return
tmp
;
...
...
This diff is collapsed.
Click to expand it.
lib/pls/CMakeLists.txt
View file @
3c60e8d7
...
...
@@ -40,7 +40,7 @@ add_library(pls STATIC
include/pls/internal/scheduling/task_manager_impl.h
include/pls/internal/scheduling/static_scheduler_memory.h
include/pls/internal/scheduling/heap_scheduler_memory.h
src/internal/scheduling/task_manager.cpp
)
src/internal/scheduling/task_manager.cpp
src/internal/scheduling/thread_state.cpp
)
# Dependencies for pls
target_link_libraries
(
pls Threads::Threads
)
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/error_handling.h
View file @
3c60e8d7
...
...
@@ -16,6 +16,6 @@
void
pls_error
(
const
char
*
msg
);
// TODO: Distinguish between debug/internal asserts and production asserts.
#define PLS_ASSERT(cond, msg)
//
if (!(cond)) { pls_error(msg); }
#define PLS_ASSERT(cond, msg) if (!(cond)) { pls_error(msg); }
#endif //PLS_ERROR_HANDLING_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/system_details.h
View file @
3c60e8d7
...
...
@@ -67,6 +67,24 @@ inline void relax_cpu() {
#endif
}
/**
* Prevent inlining of functions. This is a compiler specific setting and
* it is seen as an error to not properly declare this.
* (Some functions in the codebase MUST be re-evaluated after fiber switches,
* by preventing inlining them we prevent the compiler caching their results)
*/
#if defined(_MSC_VER)
#define PLS_NOINLINE __declspec(noinline)
#elif defined(__GNUC__) && __GNUC__ > 3
#if defined(__CUDACC__)
#define PLS_NOINLINE __attribute__ ((noinline))
#else
#define PLS_NOINLINE __attribute__ ((__noinline__))
#endif
#else
#error "PLS requires inline prevention for certain functions."
#endif
}
}
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/external_trading_deque.h
View file @
3c60e8d7
...
...
@@ -78,20 +78,20 @@ class external_trading_deque {
auto
expected_stamp
=
bot_internal_
.
stamp
;
auto
&
current_entry
=
entries_
[
bot_internal_
.
value
];
// Publish the prepared task in the deque.
current_entry
.
forwarding_stamp_
.
store
(
expected_stamp
,
std
::
memory_order_relaxed
);
current_entry
.
traded_task_
.
store
(
published_task
,
std
::
memory_order_relaxed
);
// Field that all threads synchronize on.
// This happens not in the deque itself, but in the published task.
traded_cas_field
sync_cas_field
;
sync_cas_field
.
fill_with_stamp
(
expected_stamp
,
thread_id_
);
published_task
->
external_trading_deque_cas_
.
store
(
sync_cas_field
);
// Publish the prepared task in the deque.
current_entry
.
forwarding_stamp_
.
store
(
expected_stamp
);
current_entry
.
traded_task_
.
store
(
published_task
);
published_task
->
external_trading_deque_cas_
.
store
(
sync_cas_field
,
std
::
memory_order_release
);
// Advance the bot pointer. Linearization point for making the task public.
bot_internal_
.
stamp
++
;
bot_internal_
.
value
++
;
bot_
.
store
(
bot_internal_
.
value
);
bot_
.
store
(
bot_internal_
.
value
,
std
::
memory_order_release
);
}
void
reset_bot_and_top
()
{
...
...
@@ -104,7 +104,7 @@ class external_trading_deque {
void
decrease_bot
()
{
bot_internal_
.
value
--
;
bot_
.
store
(
bot_internal_
.
value
);
bot_
.
store
(
bot_internal_
.
value
,
std
::
memory_order_relaxed
);
}
/**
...
...
@@ -120,15 +120,17 @@ class external_trading_deque {
decrease_bot
();
auto
&
current_entry
=
entries_
[
bot_internal_
.
value
];
auto
*
popped_task
=
current_entry
.
traded_task_
.
load
();
auto
expected_stamp
=
current_entry
.
forwarding_stamp_
.
load
();
auto
*
popped_task
=
current_entry
.
traded_task_
.
load
(
std
::
memory_order_relaxed
);
auto
expected_stamp
=
current_entry
.
forwarding_stamp_
.
load
(
std
::
memory_order_relaxed
);
// We know what value must be in the cas field if no other thread stole it.
traded_cas_field
expected_sync_cas_field
;
expected_sync_cas_field
.
fill_with_stamp
(
expected_stamp
,
thread_id_
);
traded_cas_field
empty_cas_field
;
if
(
popped_task
->
external_trading_deque_cas_
.
compare_exchange_strong
(
expected_sync_cas_field
,
empty_cas_field
))
{
if
(
popped_task
->
external_trading_deque_cas_
.
compare_exchange_strong
(
expected_sync_cas_field
,
empty_cas_field
,
std
::
memory_order_acq_rel
))
{
return
optional
<
task
*>
{
popped_task
};
}
else
{
reset_bot_and_top
();
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
3c60e8d7
...
...
@@ -27,10 +27,11 @@ class scheduler::init_function_impl : public init_function {
void
run
()
override
{
auto
&
root_task
=
thread_state
::
get
().
get_task_manager
().
get_active_task
();
root_task
.
run_as_task
([
&
](
context_switcher
::
continuation
cont
)
{
thread_state
::
get
().
set_main_continuation
(
std
::
move
(
cont
)
);
thread_state
::
get
().
main_continuation
()
=
std
::
move
(
cont
);
function_
();
thread_state
::
get
().
get_scheduler
().
work_section_done_
.
store
(
true
);
return
std
::
move
(
thread_state
::
get
().
get_main_continuation
());
PLS_ASSERT
(
thread_state
::
get
().
main_continuation
().
valid
(),
"Must return valid continuation from main task."
);
return
std
::
move
(
thread_state
::
get
().
main_continuation
());
});
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/task_manager.h
View file @
3c60e8d7
...
...
@@ -52,7 +52,7 @@ class task_manager {
void
spawn_child
(
F
&&
lambda
);
void
sync
();
task
*
steal_task
(
task_manager
&
stealing_task_manager
);
task
*
steal_task
(
task_manager
&
stealing_task_manager
);
bool
try_clean_return
(
context_switcher
::
continuation
&
result_cont
);
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/task_manager_impl.h
View file @
3c60e8d7
...
...
@@ -71,7 +71,7 @@ void task_manager::spawn_child(F &&lambda) {
if
(
continuation
.
valid
())
{
// We jumped in here from the main loop, keep track!
thread_state
::
get
().
set_main_continuation
(
std
::
move
(
continuation
)
);
thread_state
::
get
().
main_continuation
()
=
std
::
move
(
continuation
);
}
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
3c60e8d7
...
...
@@ -6,6 +6,8 @@
#include <chrono>
#include <utility>
#include "pls/internal/base/system_details.h"
#include "context_switcher/continuation.h"
namespace
pls
{
...
...
@@ -37,9 +39,11 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) thread_state {
* Must only be called on threads that are associated with a thread_state,
* this will most likely be threads created by the scheduler.
*
* Each call is guaranteed to be a new lockup, i.e. it is not cached after fiber context switches.
*
* @return The thread_state of this thread.
*/
static
thread_state
&
get
()
{
return
*
base
::
this_thread
::
state
<
thread_state
>
();
}
static
thread_state
&
PLS_NOINLINE
get
();
unsigned
get_id
()
{
return
id_
;
}
void
set_id
(
unsigned
id
)
{
...
...
@@ -54,11 +58,8 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) thread_state {
return
random_
();
}
void
set_main_continuation
(
context_switcher
::
continuation
&&
continuation
)
{
main_loop_continuation_
=
std
::
move
(
continuation
);
}
context_switcher
::
continuation
get_main_continuation
()
{
return
std
::
move
(
main_loop_continuation_
);
context_switcher
::
continuation
&
main_continuation
()
{
return
main_loop_continuation_
;
}
// Do not allow move/copy operations.
...
...
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/task_manager.cpp
View file @
3c60e8d7
#include <tuple>
#include "pls/internal/scheduling/task_manager.h"
#include "pls/internal/scheduling/task.h"
...
...
@@ -76,7 +74,6 @@ task *task_manager::steal_task(task_manager &stealing_task_manager) {
}
void
task_manager
::
push_resource_on_task
(
task
*
target_task
,
task
*
spare_task_chain
)
{
PLS_ASSERT
(
check_task_chain_backward
(
spare_task_chain
),
"Must only push proper task chains."
);
PLS_ASSERT
(
target_task
->
thread_id_
!=
spare_task_chain
->
thread_id_
,
"Makes no sense to push task onto itself, as it is not clean by definition."
);
PLS_ASSERT
(
target_task
->
depth_
==
spare_task_chain
->
depth_
,
"Must only push tasks with correct depth."
);
...
...
@@ -90,11 +87,11 @@ void task_manager::push_resource_on_task(task *target_task, task *spare_task_cha
if
(
current_root
.
value
==
0
)
{
// Empty, simply push in with no successor
spare_task_chain
->
resource_stack_next_
.
store
(
nullptr
,
std
::
memory_order_relaxed
);
spare_task_chain
->
resource_stack_next_
.
store
(
nullptr
);
}
else
{
// Already an entry. Find it's corresponding task and set it as our successor.
auto
*
current_root_task
=
find_task
(
current_root
.
value
-
1
,
target_task
->
depth_
);
spare_task_chain
->
resource_stack_next_
.
store
(
current_root_task
,
std
::
memory_order_relaxed
);
spare_task_chain
->
resource_stack_next_
.
store
(
current_root_task
);
}
}
while
(
!
target_task
->
resource_stack_root_
.
compare_exchange_strong
(
current_root
,
target_root
));
...
...
@@ -112,7 +109,7 @@ task *task_manager::pop_resource_from_task(task *target_task) {
}
else
{
// Found something, try to pop it
auto
*
current_root_task
=
find_task
(
current_root
.
value
-
1
,
target_task
->
depth_
);
auto
*
next_stack_task
=
current_root_task
->
resource_stack_next_
.
load
(
std
::
memory_order_relaxed
);
auto
*
next_stack_task
=
current_root_task
->
resource_stack_next_
.
load
();
target_root
.
stamp
=
current_root
.
stamp
+
1
;
target_root
.
value
=
next_stack_task
!=
nullptr
?
next_stack_task
->
thread_id_
+
1
:
0
;
...
...
@@ -122,7 +119,7 @@ task *task_manager::pop_resource_from_task(task *target_task) {
}
while
(
!
target_task
->
resource_stack_root_
.
compare_exchange_strong
(
current_root
,
target_root
));
PLS_ASSERT
(
check_task_chain_backward
(
output_task
),
"Must only pop proper task chains."
);
output_task
->
resource_stack_next_
.
store
(
nullptr
,
std
::
memory_order_relaxed
);
output_task
->
resource_stack_next_
.
store
(
nullptr
);
return
output_task
;
}
...
...
@@ -187,7 +184,8 @@ bool task_manager::try_clean_return(context_switcher::continuation &result_cont)
}
// jump back to the continuation in main scheduling loop, time to steal some work
result_cont
=
thread_state
::
get
().
get_main_continuation
();
result_cont
=
std
::
move
(
thread_state
::
get
().
main_continuation
());
PLS_ASSERT
(
result_cont
.
valid
(),
"Must return a valid continuation."
);
return
true
;
}
else
{
// Make sure that we are owner fo this full continuation/task chain.
...
...
@@ -198,13 +196,16 @@ bool task_manager::try_clean_return(context_switcher::continuation &result_cont)
active_task_
=
last_task
;
result_cont
=
std
::
move
(
last_task
->
continuation_
);
PLS_ASSERT
(
result_cont
.
valid
(),
"Must return a valid continuation."
);
return
false
;
}
}
bool
task_manager
::
check_task_chain_forward
(
task
*
start_task
)
{
while
(
start_task
->
next_
!=
nullptr
)
{
PLS_ASSERT
(
start_task
->
next_
->
prev_
==
start_task
,
"Chain must have correct prev/next fields for linked list!"
);
if
(
start_task
->
next_
->
prev_
!=
start_task
)
{
return
false
;
}
start_task
=
start_task
->
next_
;
}
return
true
;
...
...
@@ -212,17 +213,16 @@ bool task_manager::check_task_chain_forward(task *start_task) {
bool
task_manager
::
check_task_chain_backward
(
task
*
start_task
)
{
while
(
start_task
->
prev_
!=
nullptr
)
{
PLS_ASSERT
(
start_task
->
prev_
->
next_
==
start_task
,
"Chain must have correct prev/next fields for linked list!"
);
if
(
start_task
->
prev_
->
next_
!=
start_task
)
{
return
false
;
}
start_task
=
start_task
->
prev_
;
}
return
true
;
}
bool
task_manager
::
check_task_chain
()
{
check_task_chain_backward
(
active_task_
);
check_task_chain_forward
(
active_task_
);
return
true
;
return
check_task_chain_backward
(
active_task_
)
&&
check_task_chain_forward
(
active_task_
);
}
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/thread_state.cpp
0 → 100644
View file @
3c60e8d7
#include "pls/internal/scheduling/thread_state.h"
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
thread_state
&
thread_state
::
get
()
{
return
*
base
::
this_thread
::
state
<
thread_state
>
();
}
}
}
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment