Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3c60e8d7
authored
Feb 09, 2020
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First working version on both ARM and x86.
parent
731b47c5
Pipeline
#1403
failed with stages
in 39 seconds
Changes
19
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
88 additions
and
55 deletions
+88
-55
app/benchmark_fft/CMakeLists.txt
+3
-3
app/benchmark_fft/main.cpp
+2
-2
app/benchmark_fib/CMakeLists.txt
+3
-3
app/benchmark_fib/main.cpp
+1
-1
app/benchmark_matrix/CMakeLists.txt
+3
-3
app/benchmark_matrix/main.cpp
+2
-2
cmake/SetupOptimizationLevel.cmake
+1
-1
lib/context_switcher/include/context_switcher/context_switcher.h
+3
-0
lib/context_switcher/include/context_switcher/continuation.h
+0
-4
lib/pls/CMakeLists.txt
+1
-1
lib/pls/include/pls/internal/base/error_handling.h
+1
-1
lib/pls/include/pls/internal/base/system_details.h
+18
-0
lib/pls/include/pls/internal/scheduling/external_trading_deque.h
+12
-10
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+3
-2
lib/pls/include/pls/internal/scheduling/task_manager.h
+1
-1
lib/pls/include/pls/internal/scheduling/task_manager_impl.h
+1
-1
lib/pls/include/pls/internal/scheduling/thread_state.h
+7
-6
lib/pls/src/internal/scheduling/task_manager.cpp
+14
-14
lib/pls/src/internal/scheduling/thread_state.cpp
+12
-0
No files found.
app/benchmark_fft/CMakeLists.txt
View file @
3c60e8d7
add_executable
(
benchmark_fft_pls_v
2
main.cpp
)
target_link_libraries
(
benchmark_fft_pls_v
2
pls benchmark_runner benchmark_base
)
add_executable
(
benchmark_fft_pls_v
3
main.cpp
)
target_link_libraries
(
benchmark_fft_pls_v
3
pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_fft_pls_v
2
easy_profiler
)
target_link_libraries
(
benchmark_fft_pls_v
3
easy_profiler
)
endif
()
app/benchmark_fft/main.cpp
View file @
3c60e8d7
...
...
@@ -37,7 +37,7 @@ void conquer(fft::complex_vector::iterator data, int n) {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
6
4
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
@@ -49,7 +49,7 @@ int main(int argc, char **argv) {
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v
2
/"
;
string
full_directory
=
directory
+
"/PLS_v
3
/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
fft
::
complex_vector
data
=
fft
::
generate_input
();
...
...
app/benchmark_fib/CMakeLists.txt
View file @
3c60e8d7
add_executable
(
benchmark_fib_pls_v
2
main.cpp
)
target_link_libraries
(
benchmark_fib_pls_v
2
pls benchmark_runner benchmark_base
)
add_executable
(
benchmark_fib_pls_v
3
main.cpp
)
target_link_libraries
(
benchmark_fib_pls_v
3
pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_fib_pls_v
2
easy_profiler
)
target_link_libraries
(
benchmark_fib_pls_v
3
easy_profiler
)
endif
()
app/benchmark_fib/main.cpp
View file @
3c60e8d7
...
...
@@ -33,7 +33,7 @@ int pls_fib(int n) {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
1
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
app/benchmark_matrix/CMakeLists.txt
View file @
3c60e8d7
add_executable
(
benchmark_matrix_pls_v
2
main.cpp
)
target_link_libraries
(
benchmark_matrix_pls_v
2
pls benchmark_runner benchmark_base
)
add_executable
(
benchmark_matrix_pls_v
3
main.cpp
)
target_link_libraries
(
benchmark_matrix_pls_v
3
pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_matrix_pls_v
2
easy_profiler
)
target_link_libraries
(
benchmark_matrix_pls_v
3
easy_profiler
)
endif
()
app/benchmark_matrix/main.cpp
View file @
3c60e8d7
...
...
@@ -23,7 +23,7 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
1
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v
2
/"
;
string
full_directory
=
directory
+
"/PLS_v
3
/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
pls_matrix
<
double
,
matrix
::
MATRIX_SIZE
>
a
;
...
...
cmake/SetupOptimizationLevel.cmake
View file @
3c60e8d7
...
...
@@ -18,7 +18,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
# but inlining functions and SIMD/Vectorization is
# only enabled by -O3, thus it's way faster in some
# array calculations.
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-O
3
-march=native"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-O
2
-march=native"
)
set
(
CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE
)
else
()
set
(
CMAKE_CXX_FLAGS_DEBUG
"-g -O0"
)
...
...
lib/context_switcher/include/context_switcher/context_switcher.h
View file @
3c60e8d7
...
...
@@ -111,6 +111,9 @@ void lambda_capture_callback(fcontext::transfer_t transfer) {
lambda_capture
->~
T
();
continuation_t
cont_pointer
=
cont
.
consume
();
if
(
cont_pointer
==
nullptr
)
{
printf
(
"Error!!!
\n
"
);
}
fcontext
::
jump_fcontext
(
cont_pointer
,
(
void
*
)
0
);
}
...
...
lib/context_switcher/include/context_switcher/continuation.h
View file @
3c60e8d7
...
...
@@ -50,10 +50,6 @@ struct continuation {
}
continuation_t
consume
()
{
if
(
cont_pointer_
==
nullptr
)
{
printf
(
"Error!
\n
"
);
}
auto
tmp
=
cont_pointer_
;
cont_pointer_
=
nullptr
;
return
tmp
;
...
...
lib/pls/CMakeLists.txt
View file @
3c60e8d7
...
...
@@ -40,7 +40,7 @@ add_library(pls STATIC
include/pls/internal/scheduling/task_manager_impl.h
include/pls/internal/scheduling/static_scheduler_memory.h
include/pls/internal/scheduling/heap_scheduler_memory.h
src/internal/scheduling/task_manager.cpp
)
src/internal/scheduling/task_manager.cpp
src/internal/scheduling/thread_state.cpp
)
# Dependencies for pls
target_link_libraries
(
pls Threads::Threads
)
...
...
lib/pls/include/pls/internal/base/error_handling.h
View file @
3c60e8d7
...
...
@@ -16,6 +16,6 @@
void
pls_error
(
const
char
*
msg
);
// TODO: Distinguish between debug/internal asserts and production asserts.
#define PLS_ASSERT(cond, msg)
//
if (!(cond)) { pls_error(msg); }
#define PLS_ASSERT(cond, msg) if (!(cond)) { pls_error(msg); }
#endif //PLS_ERROR_HANDLING_H
lib/pls/include/pls/internal/base/system_details.h
View file @
3c60e8d7
...
...
@@ -67,6 +67,24 @@ inline void relax_cpu() {
#endif
}
/**
* Prevent inlining of functions. This is a compiler specific setting and
* it is seen as an error to not properly declare this.
* (Some functions in the codebase MUST be re-evaluated after fiber switches,
* by preventing inlining them we prevent the compiler caching their results)
*/
#if defined(_MSC_VER)
#define PLS_NOINLINE __declspec(noinline)
#elif defined(__GNUC__) && __GNUC__ > 3
#if defined(__CUDACC__)
#define PLS_NOINLINE __attribute__ ((noinline))
#else
#define PLS_NOINLINE __attribute__ ((__noinline__))
#endif
#else
#error "PLS requires inline prevention for certain functions."
#endif
}
}
}
...
...
lib/pls/include/pls/internal/scheduling/external_trading_deque.h
View file @
3c60e8d7
...
...
@@ -78,20 +78,20 @@ class external_trading_deque {
auto
expected_stamp
=
bot_internal_
.
stamp
;
auto
&
current_entry
=
entries_
[
bot_internal_
.
value
];
// Publish the prepared task in the deque.
current_entry
.
forwarding_stamp_
.
store
(
expected_stamp
,
std
::
memory_order_relaxed
);
current_entry
.
traded_task_
.
store
(
published_task
,
std
::
memory_order_relaxed
);
// Field that all threads synchronize on.
// This happens not in the deque itself, but in the published task.
traded_cas_field
sync_cas_field
;
sync_cas_field
.
fill_with_stamp
(
expected_stamp
,
thread_id_
);
published_task
->
external_trading_deque_cas_
.
store
(
sync_cas_field
);
// Publish the prepared task in the deque.
current_entry
.
forwarding_stamp_
.
store
(
expected_stamp
);
current_entry
.
traded_task_
.
store
(
published_task
);
published_task
->
external_trading_deque_cas_
.
store
(
sync_cas_field
,
std
::
memory_order_release
);
// Advance the bot pointer. Linearization point for making the task public.
bot_internal_
.
stamp
++
;
bot_internal_
.
value
++
;
bot_
.
store
(
bot_internal_
.
value
);
bot_
.
store
(
bot_internal_
.
value
,
std
::
memory_order_release
);
}
void
reset_bot_and_top
()
{
...
...
@@ -104,7 +104,7 @@ class external_trading_deque {
void
decrease_bot
()
{
bot_internal_
.
value
--
;
bot_
.
store
(
bot_internal_
.
value
);
bot_
.
store
(
bot_internal_
.
value
,
std
::
memory_order_relaxed
);
}
/**
...
...
@@ -120,15 +120,17 @@ class external_trading_deque {
decrease_bot
();
auto
&
current_entry
=
entries_
[
bot_internal_
.
value
];
auto
*
popped_task
=
current_entry
.
traded_task_
.
load
();
auto
expected_stamp
=
current_entry
.
forwarding_stamp_
.
load
();
auto
*
popped_task
=
current_entry
.
traded_task_
.
load
(
std
::
memory_order_relaxed
);
auto
expected_stamp
=
current_entry
.
forwarding_stamp_
.
load
(
std
::
memory_order_relaxed
);
// We know what value must be in the cas field if no other thread stole it.
traded_cas_field
expected_sync_cas_field
;
expected_sync_cas_field
.
fill_with_stamp
(
expected_stamp
,
thread_id_
);
traded_cas_field
empty_cas_field
;
if
(
popped_task
->
external_trading_deque_cas_
.
compare_exchange_strong
(
expected_sync_cas_field
,
empty_cas_field
))
{
if
(
popped_task
->
external_trading_deque_cas_
.
compare_exchange_strong
(
expected_sync_cas_field
,
empty_cas_field
,
std
::
memory_order_acq_rel
))
{
return
optional
<
task
*>
{
popped_task
};
}
else
{
reset_bot_and_top
();
...
...
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
3c60e8d7
...
...
@@ -27,10 +27,11 @@ class scheduler::init_function_impl : public init_function {
void
run
()
override
{
auto
&
root_task
=
thread_state
::
get
().
get_task_manager
().
get_active_task
();
root_task
.
run_as_task
([
&
](
context_switcher
::
continuation
cont
)
{
thread_state
::
get
().
set_main_continuation
(
std
::
move
(
cont
)
);
thread_state
::
get
().
main_continuation
()
=
std
::
move
(
cont
);
function_
();
thread_state
::
get
().
get_scheduler
().
work_section_done_
.
store
(
true
);
return
std
::
move
(
thread_state
::
get
().
get_main_continuation
());
PLS_ASSERT
(
thread_state
::
get
().
main_continuation
().
valid
(),
"Must return valid continuation from main task."
);
return
std
::
move
(
thread_state
::
get
().
main_continuation
());
});
}
...
...
lib/pls/include/pls/internal/scheduling/task_manager.h
View file @
3c60e8d7
...
...
@@ -52,7 +52,7 @@ class task_manager {
void
spawn_child
(
F
&&
lambda
);
void
sync
();
task
*
steal_task
(
task_manager
&
stealing_task_manager
);
task
*
steal_task
(
task_manager
&
stealing_task_manager
);
bool
try_clean_return
(
context_switcher
::
continuation
&
result_cont
);
...
...
lib/pls/include/pls/internal/scheduling/task_manager_impl.h
View file @
3c60e8d7
...
...
@@ -71,7 +71,7 @@ void task_manager::spawn_child(F &&lambda) {
if
(
continuation
.
valid
())
{
// We jumped in here from the main loop, keep track!
thread_state
::
get
().
set_main_continuation
(
std
::
move
(
continuation
)
);
thread_state
::
get
().
main_continuation
()
=
std
::
move
(
continuation
);
}
}
...
...
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
3c60e8d7
...
...
@@ -6,6 +6,8 @@
#include <chrono>
#include <utility>
#include "pls/internal/base/system_details.h"
#include "context_switcher/continuation.h"
namespace
pls
{
...
...
@@ -37,9 +39,11 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) thread_state {
* Must only be called on threads that are associated with a thread_state,
* this will most likely be threads created by the scheduler.
*
* Each call is guaranteed to be a new lockup, i.e. it is not cached after fiber context switches.
*
* @return The thread_state of this thread.
*/
static
thread_state
&
get
()
{
return
*
base
::
this_thread
::
state
<
thread_state
>
();
}
static
thread_state
&
PLS_NOINLINE
get
();
unsigned
get_id
()
{
return
id_
;
}
void
set_id
(
unsigned
id
)
{
...
...
@@ -54,11 +58,8 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) thread_state {
return
random_
();
}
void
set_main_continuation
(
context_switcher
::
continuation
&&
continuation
)
{
main_loop_continuation_
=
std
::
move
(
continuation
);
}
context_switcher
::
continuation
get_main_continuation
()
{
return
std
::
move
(
main_loop_continuation_
);
context_switcher
::
continuation
&
main_continuation
()
{
return
main_loop_continuation_
;
}
// Do not allow move/copy operations.
...
...
lib/pls/src/internal/scheduling/task_manager.cpp
View file @
3c60e8d7
#include <tuple>
#include "pls/internal/scheduling/task_manager.h"
#include "pls/internal/scheduling/task.h"
...
...
@@ -76,7 +74,6 @@ task *task_manager::steal_task(task_manager &stealing_task_manager) {
}
void
task_manager
::
push_resource_on_task
(
task
*
target_task
,
task
*
spare_task_chain
)
{
PLS_ASSERT
(
check_task_chain_backward
(
spare_task_chain
),
"Must only push proper task chains."
);
PLS_ASSERT
(
target_task
->
thread_id_
!=
spare_task_chain
->
thread_id_
,
"Makes no sense to push task onto itself, as it is not clean by definition."
);
PLS_ASSERT
(
target_task
->
depth_
==
spare_task_chain
->
depth_
,
"Must only push tasks with correct depth."
);
...
...
@@ -90,11 +87,11 @@ void task_manager::push_resource_on_task(task *target_task, task *spare_task_cha
if
(
current_root
.
value
==
0
)
{
// Empty, simply push in with no successor
spare_task_chain
->
resource_stack_next_
.
store
(
nullptr
,
std
::
memory_order_relaxed
);
spare_task_chain
->
resource_stack_next_
.
store
(
nullptr
);
}
else
{
// Already an entry. Find it's corresponding task and set it as our successor.
auto
*
current_root_task
=
find_task
(
current_root
.
value
-
1
,
target_task
->
depth_
);
spare_task_chain
->
resource_stack_next_
.
store
(
current_root_task
,
std
::
memory_order_relaxed
);
spare_task_chain
->
resource_stack_next_
.
store
(
current_root_task
);
}
}
while
(
!
target_task
->
resource_stack_root_
.
compare_exchange_strong
(
current_root
,
target_root
));
...
...
@@ -112,7 +109,7 @@ task *task_manager::pop_resource_from_task(task *target_task) {
}
else
{
// Found something, try to pop it
auto
*
current_root_task
=
find_task
(
current_root
.
value
-
1
,
target_task
->
depth_
);
auto
*
next_stack_task
=
current_root_task
->
resource_stack_next_
.
load
(
std
::
memory_order_relaxed
);
auto
*
next_stack_task
=
current_root_task
->
resource_stack_next_
.
load
();
target_root
.
stamp
=
current_root
.
stamp
+
1
;
target_root
.
value
=
next_stack_task
!=
nullptr
?
next_stack_task
->
thread_id_
+
1
:
0
;
...
...
@@ -122,7 +119,7 @@ task *task_manager::pop_resource_from_task(task *target_task) {
}
while
(
!
target_task
->
resource_stack_root_
.
compare_exchange_strong
(
current_root
,
target_root
));
PLS_ASSERT
(
check_task_chain_backward
(
output_task
),
"Must only pop proper task chains."
);
output_task
->
resource_stack_next_
.
store
(
nullptr
,
std
::
memory_order_relaxed
);
output_task
->
resource_stack_next_
.
store
(
nullptr
);
return
output_task
;
}
...
...
@@ -187,7 +184,8 @@ bool task_manager::try_clean_return(context_switcher::continuation &result_cont)
}
// jump back to the continuation in main scheduling loop, time to steal some work
result_cont
=
thread_state
::
get
().
get_main_continuation
();
result_cont
=
std
::
move
(
thread_state
::
get
().
main_continuation
());
PLS_ASSERT
(
result_cont
.
valid
(),
"Must return a valid continuation."
);
return
true
;
}
else
{
// Make sure that we are owner fo this full continuation/task chain.
...
...
@@ -198,13 +196,16 @@ bool task_manager::try_clean_return(context_switcher::continuation &result_cont)
active_task_
=
last_task
;
result_cont
=
std
::
move
(
last_task
->
continuation_
);
PLS_ASSERT
(
result_cont
.
valid
(),
"Must return a valid continuation."
);
return
false
;
}
}
bool
task_manager
::
check_task_chain_forward
(
task
*
start_task
)
{
while
(
start_task
->
next_
!=
nullptr
)
{
PLS_ASSERT
(
start_task
->
next_
->
prev_
==
start_task
,
"Chain must have correct prev/next fields for linked list!"
);
if
(
start_task
->
next_
->
prev_
!=
start_task
)
{
return
false
;
}
start_task
=
start_task
->
next_
;
}
return
true
;
...
...
@@ -212,17 +213,16 @@ bool task_manager::check_task_chain_forward(task *start_task) {
bool
task_manager
::
check_task_chain_backward
(
task
*
start_task
)
{
while
(
start_task
->
prev_
!=
nullptr
)
{
PLS_ASSERT
(
start_task
->
prev_
->
next_
==
start_task
,
"Chain must have correct prev/next fields for linked list!"
);
if
(
start_task
->
prev_
->
next_
!=
start_task
)
{
return
false
;
}
start_task
=
start_task
->
prev_
;
}
return
true
;
}
bool
task_manager
::
check_task_chain
()
{
check_task_chain_backward
(
active_task_
);
check_task_chain_forward
(
active_task_
);
return
true
;
return
check_task_chain_backward
(
active_task_
)
&&
check_task_chain_forward
(
active_task_
);
}
}
...
...
lib/pls/src/internal/scheduling/thread_state.cpp
0 → 100644
View file @
3c60e8d7
#include "pls/internal/scheduling/thread_state.h"
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
thread_state
&
thread_state
::
get
()
{
return
*
base
::
this_thread
::
state
<
thread_state
>
();
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment