Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
89b6e3cb
authored
5 years ago
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add cscontext ARMv7 assembly and fast path optimization.
parent
3c60e8d7
Pipeline
#1404
failed with stages
in 37 seconds
Changes
12
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
167 additions
and
64 deletions
+167
-64
app/benchmark_fft/main.cpp
+6
-14
app/benchmark_fib/main.cpp
+4
-11
app/benchmark_matrix/main.cpp
+8
-16
cmake/SetupOptimizationLevel.cmake
+1
-1
lib/context_switcher/asm/cscontext/SelectAssemblyFiles.cmake
+5
-0
lib/context_switcher/asm/cscontext/enter_context_arm32_sysv_elf.s
+63
-0
lib/context_switcher/asm/cscontext/switch_context_arm32_sysv_elf.s
+48
-0
lib/pls/include/pls/algorithms/for_each_impl.h
+1
-4
lib/pls/include/pls/internal/scheduling/task.h
+5
-1
lib/pls/include/pls/internal/scheduling/task_manager_impl.h
+1
-0
lib/pls/src/internal/scheduling/scheduler.cpp
+1
-0
lib/pls/src/internal/scheduling/task_manager.cpp
+24
-17
No files found.
app/benchmark_fft/main.cpp
View file @
89b6e3cb
...
...
@@ -37,7 +37,7 @@ void conquer(fft::complex_vector::iterator data, int n) {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
64
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
32
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
@@ -56,19 +56,11 @@ int main(int argc, char **argv) {
scheduler
scheduler
{
global_scheduler_memory
,
(
unsigned
)
num_threads
};
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
fft
::
NUM_WARMUP_ITERATIONS
;
i
++
)
{
conquer
(
data
.
begin
(),
fft
::
SIZE
);
}
});
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
fft
::
NUM_ITERATIONS
;
i
++
)
{
runner
.
start_iteration
();
conquer
(
data
.
begin
(),
fft
::
SIZE
);
runner
.
end_iteration
();
}
});
runner
.
run_iterations
(
fft
::
NUM_ITERATIONS
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
conquer
(
data
.
begin
(),
fft
::
SIZE
);;
});
},
fft
::
NUM_WARMUP_ITERATIONS
);
runner
.
commit_results
(
true
);
return
0
;
...
...
This diff is collapsed.
Click to expand it.
app/benchmark_fib/main.cpp
View file @
89b6e3cb
...
...
@@ -51,19 +51,12 @@ int main(int argc, char **argv) {
scheduler
scheduler
{
global_scheduler_memory
,
(
unsigned
)
num_threads
};
volatile
int
res
;
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
fib
::
NUM_WARMUP_ITERATIONS
;
i
++
)
{
res
=
pls_fib
(
fib
::
INPUT_N
);
}
});
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
fib
::
NUM_ITERATIONS
;
i
++
)
{
runner
.
start_iteration
();
runner
.
run_iterations
(
fib
::
NUM_ITERATIONS
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
res
=
pls_fib
(
fib
::
INPUT_N
);
runner
.
end_iteration
();
}
});
});
},
fib
::
NUM_WARMUP_ITERATIONS
);
runner
.
commit_results
(
true
);
return
0
;
...
...
This diff is collapsed.
Click to expand it.
app/benchmark_matrix/main.cpp
View file @
89b6e3cb
...
...
@@ -14,8 +14,8 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
public
:
pls_matrix
()
:
matrix
::
matrix
<
T
,
SIZE
>
()
{}
void
pls_multiply
(
const
matrix
::
matrix
<
T
,
SIZE
>
&
a
,
const
matrix
::
matrix
<
T
,
SIZE
>
&
b
)
{
pls
::
algorithm
::
for_each_range
(
0
,
SIZE
,
[
this
,
&
a
,
&
b
](
int
i
)
{
void
multiply
(
const
matrix
::
matrix
<
T
,
SIZE
>
&
a
,
const
matrix
::
matrix
<
T
,
SIZE
>
&
b
)
override
{
pls
::
algorithm
::
for_each_range
(
0
,
SIZE
,
[
&
](
int
i
)
{
this
->
multiply_column
(
i
,
a
,
b
);
});
}
...
...
@@ -23,7 +23,7 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
4
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
1
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
...
...
@@ -44,18 +44,10 @@ int main(int argc, char **argv) {
scheduler
scheduler
{
global_scheduler_memory
,
(
unsigned
)
num_threads
};
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
matrix
::
WARMUP_ITERATIONS
;
i
++
)
{
result
.
pls_multiply
(
a
,
b
);
}
});
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
matrix
::
NUM_ITERATIONS
;
i
++
)
{
runner
.
start_iteration
();
result
.
pls_multiply
(
a
,
b
);
runner
.
end_iteration
();
}
});
runner
.
run_iterations
(
matrix
::
NUM_ITERATIONS
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
result
.
multiply
(
a
,
b
);
});
},
matrix
::
WARMUP_ITERATIONS
);
runner
.
commit_results
(
true
);
}
This diff is collapsed.
Click to expand it.
cmake/SetupOptimizationLevel.cmake
View file @
89b6e3cb
...
...
@@ -18,7 +18,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
# but inlining functions and SIMD/Vectorization is
# only enabled by -O3, thus it's way faster in some
# array calculations.
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-O
2
-march=native"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-O
3
-march=native"
)
set
(
CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE
)
else
()
set
(
CMAKE_CXX_FLAGS_DEBUG
"-g -O0"
)
...
...
This diff is collapsed.
Click to expand it.
lib/context_switcher/asm/cscontext/SelectAssemblyFiles.cmake
View file @
89b6e3cb
...
...
@@ -9,6 +9,11 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Lin
SET
(
CS_CSCONTEXT_ASSEMBLY
asm/cscontext/enter_context_x86_64_sysv_elf.s
asm/cscontext/switch_context_x86_64_sysv_elf.s
)
elseif
(
CMAKE_SYSTEM_PROCESSOR STREQUAL
"armv7l"
AND CMAKE_SYSTEM_NAME STREQUAL
"Linux"
)
# Typical Linux running on ARMv7
SET
(
CS_CSCONTEXT_ASSEMBLY
asm/cscontext/enter_context_arm32_sysv_elf.s
asm/cscontext/switch_context_arm32_sysv_elf.s
)
else
()
SET
(
CS_CSCONTEXT_FOUND FALSE
)
endif
()
...
...
This diff is collapsed.
Click to expand it.
lib/context_switcher/asm/cscontext/enter_context_arm32_sysv_elf.s
0 → 100644
View file @
89b6e3cb
.arm
.text
.global __cs_enter_context
.type __cs_enter_context, %function
__cs_enter_context:
/* Parameter List (in order)
* r0 = new stack pointer
* r1 = first parameter to callback
* r2 = callback function pointer
* r3 = new stack limit (not used on most platforms)
*
* Return
* r0 = continuation that returned control back to the caller (null if fallthrough)
*
* Variables
* r4 = temporary for the old stack pointer */
/* ========== Save State ========== */
/* store programm counter for later return */
push {lr}
/* store callee saved registers */
push {r4-r12,lr}
/* store floating point extension registers */
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
sub sp, sp, #64
vstmia sp, {d8-d15}
#endif
/* ========== Save State ========== */
/* Perform change to new stack */
/* Keep old stack as second parameter to our callback function. */
mov r4, sp
/* Make sure that stack start is properly aligned. */
and r0, r0, #-16
/* Switch to new stack pointer. */
mov sp, r0
/* Perform actual function call, this will now be on the new stack */
/* r0 = first parametor to callback (continuation) */
/* r1 = second parameter to callback (arbetary pointer) */
mov r0, r4
blx r2
/* Restore state of returned continuation. */
/* To do so we first reset the stack pointer (which we get returned in r0). */
/* After that we execute our standard restore procedere to pop the state from the stack. */
mov sp, r0
/* ========== Restore State ========== */
/* restore floating point extension registers */
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
vldmia sp, {d8-d15}
add sp, sp, #64
#endif
/* restore callee saved registers */
pop {r4-r12,lr}
/* ========== Restore State ========== */
/* Just return back from the call. */
/* This is the end of a fiber, so we have no continuation. */
eor r0, r0, r0
pop {pc}
This diff is collapsed.
Click to expand it.
lib/context_switcher/asm/cscontext/switch_context_arm32_sysv_elf.s
0 → 100644
View file @
89b6e3cb
.arm
.text
.global __cs_switch_context
.type __cs_switch_context, %function
__cs_switch_context:
/* Parameter List (in order)
* r0 = pointer to continuation (should hold value of target stack will be filled with this continuation)
*
* Return
* r0 = continuation that returned control back to the caller (null if fallthrough)
*
* Variables
* r1 = temporary for the old stack pointer */
/* ========== Save State ========== */
/* store programm counter for later return */
push {lr}
/* store callee saved registers */
push {r4-r12,lr}
/* store floating point extension registers */
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
sub sp, sp, #64
vstmia sp, {d8-d15}
#endif
/* ========== Save State ========== */
/* Perform change to new stack */
/* Keep old stack as result from this function. */
mov r1, sp
/* Switch to new stack pointer. */
mov sp, r0
/* ========== Restore State ========== */
/* restore floating point extension registers */
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
vldmia sp, {d8-d15}
add sp, sp, #64
#endif
/* restore callee saved registers */
pop {r4-r12,lr}
/* ========== Restore State ========== */
/* Just return back from the call. */
/* This is the end of a fiber, so we have no continuation. */
mov r0, r1
pop {pc}
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/algorithms/for_each_impl.h
View file @
89b6e3cb
...
...
@@ -77,10 +77,7 @@ void for_each(RandomIt
execution_strategy
)
{
long
num_elements
=
std
::
distance
(
first
,
last
);
return
internal
::
for_each
(
first
,
last
,
function
,
execution_strategy
.
calculate_min_elements
(
num_elements
)
);
internal
::
for_each
(
first
,
last
,
function
,
execution_strategy
.
calculate_min_elements
(
num_elements
));
}
template
<
typename
RandomIt
,
typename
Function
>
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/task.h
View file @
89b6e3cb
...
...
@@ -34,6 +34,8 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) task {
depth_
=
depth
;
thread_id_
=
thread_id
;
is_synchronized_
=
false
;
}
template
<
typename
F
>
...
...
@@ -44,9 +46,11 @@ struct alignas(base::system_details::CACHE_LINE_SIZE) task {
// TODO: Proper access control and split it up into responsibilities
// Stack/Continuation Management
char
*
stack_memory_
;
size_t
stack_size_
;
size_t
stack_size_
;
// TODO: maybe remove it, not needed in here
context_switcher
::
continuation
continuation_
;
bool
is_synchronized_
;
// TODO: Clean up responsibilities
// Work-Stealing
std
::
atomic
<
traded_cas_field
>
external_trading_deque_cas_
{};
std
::
atomic
<
task
*>
resource_stack_next_
{};
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/task_manager_impl.h
View file @
89b6e3cb
...
...
@@ -28,6 +28,7 @@ void task_manager::spawn_child(F &&lambda) {
last_task
->
continuation_
=
std
::
move
(
cont
);
// we are now executing the new task, allow others to steal the last task continuation.
spawned_task
->
is_synchronized_
=
true
;
spawning_task_manager
->
active_task_
=
spawned_task
;
spawning_task_manager
->
deque_
.
push_bot
(
last_task
);
...
...
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
89b6e3cb
...
...
@@ -108,6 +108,7 @@ void scheduler::work_thread_work_section() {
// Execute the stolen task by jumping to it's continuation.
PLS_ASSERT
(
stolen_task
->
continuation_
.
valid
(),
"A task that we can steal must have a valid continuation for us to start working."
);
stolen_task
->
is_synchronized_
=
false
;
context_switcher
::
switch_context
(
std
::
move
(
stolen_task
->
continuation_
));
// We will continue execution in this line when we finished the stolen work.
}
...
...
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/task_manager.cpp
View file @
89b6e3cb
...
...
@@ -128,23 +128,29 @@ void task_manager::sync() {
auto
*
last_task
=
spawning_task_manager
->
active_task_
;
auto
*
spawned_task
=
spawning_task_manager
->
active_task_
->
next_
;
auto
continuation
=
spawned_task
->
run_as_task
([
=
](
context_switcher
::
continuation
cont
)
{
last_task
->
continuation_
=
std
::
move
(
cont
);
spawning_task_manager
->
active_task_
=
spawned_task
;
context_switcher
::
continuation
result_cont
;
if
(
spawning_task_manager
->
try_clean_return
(
result_cont
))
{
// We return back to the main scheduling loop
return
result_cont
;
}
else
{
// We finish up the last task
return
result_cont
;
}
});
PLS_ASSERT
(
!
continuation
.
valid
(),
"We only return to a sync point, never jump to it directly."
"This must therefore never return an unfinished fiber/continuation."
);
if
(
last_task
->
is_synchronized_
)
{
return
;
// We are already the sole owner of last_task
}
else
{
auto
continuation
=
spawned_task
->
run_as_task
([
=
](
context_switcher
::
continuation
cont
)
{
last_task
->
continuation_
=
std
::
move
(
cont
);
spawning_task_manager
->
active_task_
=
spawned_task
;
context_switcher
::
continuation
result_cont
;
if
(
spawning_task_manager
->
try_clean_return
(
result_cont
))
{
// We return back to the main scheduling loop
return
result_cont
;
}
else
{
// We finish up the last task
return
result_cont
;
}
});
PLS_ASSERT
(
!
continuation
.
valid
(),
"We only return to a sync point, never jump to it directly."
"This must therefore never return an unfinished fiber/continuation."
);
return
;
// We cleanly synced to the last one finishing work on last_task
}
}
bool
task_manager
::
try_clean_return
(
context_switcher
::
continuation
&
result_cont
)
{
...
...
@@ -195,6 +201,7 @@ bool task_manager::try_clean_return(context_switcher::continuation &result_cont)
// We are the last one working on this task. Thus the sync must be finished, continue working.
active_task_
=
last_task
;
last_task
->
is_synchronized_
=
true
;
result_cont
=
std
::
move
(
last_task
->
continuation_
);
PLS_ASSERT
(
result_cont
.
valid
(),
"Must return a valid continuation."
);
return
false
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment