Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
aa270645
authored
Apr 17, 2019
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Reformate code to fit GNU code formating style.
parent
3ff10baa
Pipeline
#1157
passed with stages
in 3 minutes 36 seconds
Changes
46
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
46 changed files
with
637 additions
and
569 deletions
+637
-569
app/benchmark_fft/main.cpp
+0
-1
app/invoke_parallel/main.cpp
+0
-0
app/playground/main.cpp
+4
-3
app/test_for_new/main.cpp
+1
-2
lib/pls/include/pls/algorithms/invoke_parallel.h
+9
-7
lib/pls/include/pls/algorithms/invoke_parallel_impl.h
+22
-20
lib/pls/include/pls/internal/base/alignment.h
+16
-14
lib/pls/include/pls/internal/base/barrier.h
+9
-7
lib/pls/include/pls/internal/base/spin_lock.h
+8
-6
lib/pls/include/pls/internal/base/system_details.h
+13
-11
lib/pls/include/pls/internal/base/tas_spin_lock.h
+12
-12
lib/pls/include/pls/internal/base/thread.h
+31
-28
lib/pls/include/pls/internal/base/thread_impl.h
+32
-31
lib/pls/include/pls/internal/base/ttas_spin_lock.h
+10
-12
lib/pls/include/pls/internal/data_structures/aligned_stack.h
+18
-15
lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
+18
-16
lib/pls/include/pls/internal/data_structures/deque.h
+27
-25
lib/pls/include/pls/internal/helpers/mini_benchmark.h
+11
-9
lib/pls/include/pls/internal/helpers/prohibit_new.h
+1
-1
lib/pls/include/pls/internal/helpers/unique_id.h
+11
-9
lib/pls/include/pls/internal/scheduling/abstract_task.h
+14
-12
lib/pls/include/pls/internal/scheduling/fork_join_task.h
+35
-33
lib/pls/include/pls/internal/scheduling/root_task.h
+20
-18
lib/pls/include/pls/internal/scheduling/run_on_n_threads_task.h
+27
-24
lib/pls/include/pls/internal/scheduling/scheduler.h
+16
-14
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+14
-12
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
+30
-27
lib/pls/include/pls/internal/scheduling/thread_state.h
+17
-15
lib/pls/include/pls/pls.h
+10
-8
lib/pls/src/internal/base/alignment.cpp
+15
-13
lib/pls/src/internal/base/barrier.cpp
+12
-10
lib/pls/src/internal/base/tas_spin_lock.cpp
+12
-10
lib/pls/src/internal/base/thread.cpp
+10
-8
lib/pls/src/internal/base/ttas_spin_lock.cpp
+12
-10
lib/pls/src/internal/data_structures/aligned_stack.cpp
+7
-5
lib/pls/src/internal/data_structures/deque.cpp
+14
-12
lib/pls/src/internal/scheduling/abstract_task.cpp
+9
-7
lib/pls/src/internal/scheduling/fork_join_task.cpp
+30
-28
lib/pls/src/internal/scheduling/root_task.cpp
+4
-4
lib/pls/src/internal/scheduling/run_on_n_threads_task.cpp
+4
-4
lib/pls/src/internal/scheduling/scheduler.cpp
+17
-14
lib/pls/src/internal/scheduling/scheduler_memory.cpp
+19
-14
lib/pls/src/internal/scheduling/thread_state.cpp
+4
-4
test/base_tests.cpp
+5
-5
test/data_structures_test.cpp
+12
-13
test/scheduling_tests.cpp
+15
-16
No files found.
app/benchmark_fft/main.cpp
View file @
aa270645
...
...
@@ -73,7 +73,6 @@ complex_vector prepare_input(int input_size) {
return
data
;
}
int
main
()
{
PROFILE_ENABLE
complex_vector
initial_input
=
prepare_input
(
INPUT_SIZE
);
...
...
app/invoke_parallel/main.cpp
View file @
aa270645
app/playground/main.cpp
View file @
aa270645
...
...
@@ -10,8 +10,9 @@
#include <pls/internal/scheduling/root_task.h>
#include <pls/internal/helpers/unique_id.h>
int
main
()
{
std
::
cout
<<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>::
create_id
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
helpers
::
unique_id
::
create
<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>>
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>::
create_id
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
helpers
::
unique_id
::
create
<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>>
().
type_
.
hash_code
()
<<
std
::
endl
;
}
app/test_for_new/main.cpp
View file @
aa270645
...
...
@@ -5,9 +5,8 @@ using namespace pls::internal::base;
int
global
=
0
;
int
main
()
{
// Try to use every feature, to trigger the prohibited use of new if found somewhere
auto
t1
=
start_thread
([]
()
{});
auto
t1
=
start_thread
([]()
{});
t1
.
join
();
}
lib/pls/include/pls/algorithms/invoke_parallel.h
View file @
aa270645
...
...
@@ -6,15 +6,17 @@
#include "pls/internal/scheduling/scheduler.h"
namespace
pls
{
namespace
algorithm
{
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
);
namespace
algorithm
{
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
);
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
);
// ...and so on, add more if we decide to keep this design
}
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
);
// ...and so on, add more if we decide to keep this design
}
}
#include "invoke_parallel_impl.h"
...
...
lib/pls/include/pls/algorithms/invoke_parallel_impl.h
View file @
aa270645
...
...
@@ -7,34 +7,35 @@
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
algorithm
{
namespace
internal
{
using
namespace
::
pls
::
internal
::
scheduling
;
namespace
algorithm
{
namespace
internal
{
using
namespace
::
pls
::
internal
::
scheduling
;
template
<
typename
Body
>
inline
void
run_body
(
const
Body
&
internal_body
,
const
abstract_task
::
id
&
id
)
{
template
<
typename
Body
>
inline
void
run_body
(
const
Body
&
internal_body
,
const
abstract_task
::
id
&
id
)
{
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto
current_task
=
scheduler
::
current_task
();
if
(
current_task
->
unique_id
()
==
id
)
{
auto
current_sub_task
=
reinterpret_cast
<
fork_join_task
*>
(
current_task
)
->
currently_executing
();
auto
current_sub_task
=
reinterpret_cast
<
fork_join_task
*>
(
current_task
)
->
currently_executing
();
internal_body
(
current_sub_task
);
}
else
{
fork_join_lambda
<
Body
>
root_body
(
&
internal_body
);
fork_join_task
root_task
{
&
root_body
,
id
};
scheduler
::
execute_task
(
root_task
);
}
}
}
}
}
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
)
{
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
>
();
auto
internal_body
=
[
&
]
(
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
]
(
fork_join_sub_task
*
)
{
function1
();
};
auto
internal_body
=
[
&
](
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
](
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
this_task
->
spawn_child
(
sub_task_1
);
...
...
@@ -43,18 +44,18 @@ namespace pls {
};
internal
::
run_body
(
internal_body
,
id
);
}
}
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
)
{
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
,
Function3
>
();
auto
internal_body
=
[
&
]
(
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
]
(
fork_join_sub_task
*
)
{
function1
();
};
auto
internal_body
=
[
&
](
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
](
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
auto
sub_task_body_2
=
[
&
]
(
fork_join_sub_task
*
)
{
function2
();
};
auto
sub_task_body_2
=
[
&
](
fork_join_sub_task
*
)
{
function2
();
};
auto
sub_task_2
=
fork_join_lambda
<
decltype
(
sub_task_body_2
)
>
(
&
sub_task_body_2
);
this_task
->
spawn_child
(
sub_task_1
);
...
...
@@ -64,8 +65,9 @@ namespace pls {
};
internal
::
run_body
(
internal_body
,
id
);
}
}
}
}
}
#endif //PLS_INVOKE_PARALLEL_IMPL_H
lib/pls/include/pls/internal/base/alignment.h
View file @
aa270645
...
...
@@ -8,21 +8,23 @@
#include "system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
template
<
typename
T
>
struct
aligned_wrapper
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
template
<
typename
T
>
struct
aligned_wrapper
{
alignas
(
system_details
::
CACHE_LINE_SIZE
)
unsigned
char
data
[
sizeof
(
T
)];
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
};
void
*
allocate_aligned
(
size_t
size
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
char
*
next_alignment
(
char
*
pointer
);
}
}
}
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
};
void
*
allocate_aligned
(
size_t
size
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
char
*
next_alignment
(
char
*
pointer
);
}
}
}
}
#endif //PLS_ALIGNMENT_H
lib/pls/include/pls/internal/base/barrier.h
View file @
aa270645
...
...
@@ -5,9 +5,10 @@
#include <pthread.h>
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
namespace
internal
{
namespace
base
{
/**
* Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier.
...
...
@@ -15,7 +16,7 @@ namespace pls {
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class
barrier
{
class
barrier
{
pthread_barrier_t
barrier_
;
public
:
...
...
@@ -23,9 +24,10 @@ namespace pls {
~
barrier
();
void
wait
();
};
}
}
};
}
}
}
#endif //PLS_BARRIER_H
lib/pls/include/pls/internal/base/spin_lock.h
View file @
aa270645
...
...
@@ -6,12 +6,14 @@
#include "ttas_spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
// Default Spin-Lock implementation for this project.
using
spin_lock
=
tas_spin_lock
;
}
}
namespace
internal
{
namespace
base
{
// Default Spin-Lock implementation for this project.
using
spin_lock
=
tas_spin_lock
;
}
}
}
#endif //PLS_SPINLOCK_H
lib/pls/include/pls/internal/base/system_details.h
View file @
aa270645
...
...
@@ -5,29 +5,31 @@
#include <cstdint>
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
namespace
internal
{
namespace
base
{
/**
* Collection of system details, e.g. hardware cache line size.
*
* PORTABILITY:
* Currently sane default values for x86.
*/
namespace
system_details
{
/**
namespace
system_details
{
/**
* Most processors have 64 byte cache lines
*/
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
/**
/**
* Choose one of the following ways to store thread specific data.
* Try to choose the fastest available on this processor/system.
*/
// #define PLS_THREAD_SPECIFIC_PTHREAD
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
}
#endif //PLS_SYSTEM_DETAILS_H
lib/pls/include/pls/internal/base/tas_spin_lock.h
View file @
aa270645
...
...
@@ -10,30 +10,30 @@
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
namespace
internal
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class
tas_spin_lock
{
class
tas_spin_lock
{
std
::
atomic_flag
flag_
;
unsigned
int
yield_at_tries_
;
public
:
tas_spin_lock
()
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
1024
}
{};
tas_spin_lock
(
const
tas_spin_lock
&
other
)
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
tas_spin_lock
()
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
1024
}
{};
tas_spin_lock
(
const
tas_spin_lock
&
other
)
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
};
}
}
}
};
}
}
}
#endif //PLS_TAS_SPIN_LOCK_H
lib/pls/include/pls/internal/base/thread.h
View file @
aa270645
...
...
@@ -13,11 +13,12 @@
#include "system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
using
thread_entrypoint
=
void
();
namespace
internal
{
namespace
base
{
/**
using
thread_entrypoint
=
void
();
/**
* Static methods than can be performed on the current thread.
*
* usage:
...
...
@@ -27,15 +28,16 @@ namespace pls {
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class
this_thread
{
class
this_thread
{
template
<
typename
Function
,
typename
State
>
friend
class
thread
;
friend
class
thread
;
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
static
pthread_key_t
local_storage_key_
;
static
bool
local_storage_key_initialized_
;
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
static
__thread
void
*
local_state_
;
static
__thread
void
*
local_state_
;
#endif
public
:
static
void
yield
()
{
...
...
@@ -49,7 +51,7 @@ namespace pls {
* @return The state pointer hold for this thread.
*/
template
<
typename
T
>
static
T
*
state
();
static
T
*
state
();
/**
* Stores a pointer to the thread local state object.
...
...
@@ -60,10 +62,10 @@ namespace pls {
* @param state_pointer A pointer to the threads state object.
*/
template
<
typename
T
>
static
void
set_state
(
T
*
state_pointer
);
};
static
void
set_state
(
T
*
state_pointer
);
};
/**
/**
* Abstraction for starting a function in a separate thread.
*
* @tparam Function Lambda being started on the new thread.
...
...
@@ -79,43 +81,44 @@ namespace pls {
* PORTABILITY:
* Current implementation is based on pthreads.
*/
template
<
typename
Function
,
typename
State
>
class
thread
{
template
<
typename
Function
,
typename
State
>
class
thread
{
friend
class
this_thread
;
// Keep a copy of the function (lambda) in this object to make sure it is valid when called!
Function
function_
;
State
*
state_pointer_
;
State
*
state_pointer_
;
// Wee need to wait for the started function to read
// the function_ and state_pointer_ property before returning
// from the constructor, as the object might be moved after this.
std
::
atomic_flag
*
startup_flag_
;
std
::
atomic_flag
*
startup_flag_
;
// Keep handle to native implementation
pthread_t
pthread_thread_
;
static
void
*
start_pthread_internal
(
void
*
thread_pointer
);
static
void
*
start_pthread_internal
(
void
*
thread_pointer
);
public
:
explicit
thread
(
const
Function
&
function
,
State
*
state_pointer
);
explicit
thread
(
const
Function
&
function
,
State
*
state_pointer
);
public
:
void
join
();
// make object move only
thread
(
thread
&&
)
noexcept
=
default
;
thread
&
operator
=
(
thread
&&
)
noexcept
=
default
;
thread
(
thread
&&
)
noexcept
=
default
;
thread
&
operator
=
(
thread
&&
)
noexcept
=
default
;
thread
(
const
thread
&
)
=
delete
;
thread
&
operator
=
(
const
thread
&
)
=
delete
;
};
thread
(
const
thread
&
)
=
delete
;
thread
&
operator
=
(
const
thread
&
)
=
delete
;
};
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
);
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
);
}
}
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
);
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
);
}
}
}
#include "thread_impl.h"
...
...
lib/pls/include/pls/internal/base/thread_impl.h
View file @
aa270645
...
...
@@ -3,33 +3,34 @@
#define PLS_THREAD_IMPL_H
namespace
pls
{
namespace
internal
{
namespace
base
{
template
<
typename
T
>
T
*
this_thread
::
state
()
{
namespace
internal
{
namespace
base
{
template
<
typename
T
>
T
*
this_thread
::
state
()
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
return
reinterpret_cast
<
T
*>
(
pthread_getspecific
(
local_storage_key_
));
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
return
reinterpret_cast
<
T
*>
(
local_state_
);
return
reinterpret_cast
<
T
*>
(
local_state_
);
#endif
}
}
template
<
typename
T
>
void
this_thread
::
set_state
(
T
*
state_pointer
)
{
template
<
typename
T
>
void
this_thread
::
set_state
(
T
*
state_pointer
)
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific
(
this_thread
::
local_storage_key_
,
(
void
*
)
state_pointer
);
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_
=
state_pointer
;
#endif
}
}
template
<
typename
Function
,
typename
State
>
void
*
thread
<
Function
,
State
>::
start_pthread_internal
(
void
*
thread_pointer
)
{
auto
my_thread
=
reinterpret_cast
<
thread
*>
(
thread_pointer
);
template
<
typename
Function
,
typename
State
>
void
*
thread
<
Function
,
State
>::
start_pthread_internal
(
void
*
thread_pointer
)
{
auto
my_thread
=
reinterpret_cast
<
thread
*>
(
thread_pointer
);
Function
my_function_copy
=
my_thread
->
function_
;
State
*
my_state_pointer_copy
=
my_thread
->
state_pointer_
;
State
*
my_state_pointer_copy
=
my_thread
->
state_pointer_
;
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
...
...
@@ -41,10 +42,10 @@ namespace pls {
// Finished executing the user function
pthread_exit
(
nullptr
);
}
}
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>::
thread
(
const
Function
&
function
,
State
*
state_pointer
)
:
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>::
thread
(
const
Function
&
function
,
State
*
state_pointer
)
:
function_
{
function
},
state_pointer_
{
state_pointer
},
startup_flag_
{
nullptr
},
...
...
@@ -62,27 +63,27 @@ namespace pls {
startup_flag_
=
&
startup_flag
;
startup_flag
.
test_and_set
();
// Set the flag, pthread will clear it when it is safe to return
pthread_create
(
&
pthread_thread_
,
nullptr
,
start_pthread_internal
,
(
void
*
)(
this
));
while
(
startup_flag
.
test_and_set
())
;
// Busy waiting for the starting flag to clear
}
pthread_create
(
&
pthread_thread_
,
nullptr
,
start_pthread_internal
,
(
void
*
)
(
this
));
while
(
startup_flag
.
test_and_set
());
// Busy waiting for the starting flag to clear
}
template
<
typename
Function
,
typename
State
>
void
thread
<
Function
,
State
>::
join
()
{
template
<
typename
Function
,
typename
State
>
void
thread
<
Function
,
State
>::
join
()
{
pthread_join
(
pthread_thread_
,
nullptr
);
}
}
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
)
{
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
)
{
return
thread
<
Function
,
State
>
(
function
,
state_pointer
);
}
}
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
)
{
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
)
{
return
thread
<
Function
,
void
>
(
function
,
nullptr
);
}
}
}
}
}
}
}
#endif //PLS_THREAD_IMPL_H
lib/pls/include/pls/internal/base/ttas_spin_lock.h
View file @
aa270645
...
...
@@ -8,30 +8,28 @@
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
namespace
internal
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class
ttas_spin_lock
{
class
ttas_spin_lock
{
std
::
atomic
<
int
>
flag_
;
const
unsigned
int
yield_at_tries_
;
public
:
ttas_spin_lock
()
:
flag_
{
0
},
yield_at_tries_
{
1024
}
{};
ttas_spin_lock
(
const
ttas_spin_lock
&
other
)
:
flag_
{
0
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
ttas_spin_lock
()
:
flag_
{
0
},
yield_at_tries_
{
1024
}
{};
ttas_spin_lock
(
const
ttas_spin_lock
&
other
)
:
flag_
{
0
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
};
}
}
};
}
}
}
#endif //PLS_TTAS_SPIN_LOCK_H
lib/pls/include/pls/internal/data_structures/aligned_stack.h
View file @
aa270645
...
...
@@ -9,9 +9,10 @@
#include "pls/internal/base/alignment.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
/**
namespace
internal
{
namespace
data_structures
{
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions).
...
...
@@ -23,31 +24,33 @@ namespace pls {
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* stack.pop<T>(); // Deconstruct the top object of type T
*/
class
aligned_stack
{
class
aligned_stack
{
// Keep bounds of our memory block
char
*
memory_start_
;
char
*
memory_end_
;
char
*
memory_start_
;
char
*
memory_end_
;
// Current head will always be aligned to cache lines
char
*
head_
;
char
*
head_
;
public
:
typedef
char
*
state
;
typedef
char
*
state
;
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
template
<
typename
T
>
T
*
push
(
const
T
&
object
);
T
*
push
(
const
T
&
object
);
template
<
typename
T
>
void
*
push
();
void
*
push
();
template
<
typename
T
>
T
pop
();
state
save_state
()
const
{
return
head_
;
}
void
reset_state
(
state
new_state
)
{
head_
=
new_state
;
}
};
}
}
};
}
}
}
#include "aligned_stack_impl.h"
...
...
lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
View file @
aa270645
...
...
@@ -3,17 +3,18 @@
#define PLS_ALIGNED_STACK_IMPL_H
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
template
<
typename
T
>
T
*
aligned_stack
::
push
(
const
T
&
object
)
{
namespace
internal
{
namespace
data_structures
{
template
<
typename
T
>
T
*
aligned_stack
::
push
(
const
T
&
object
)
{
// Copy-Construct
return
new
((
void
*
)
push
<
T
>
())
T
(
object
);
}
return
new
((
void
*
)
push
<
T
>
())
T
(
object
);
}
template
<
typename
T
>
void
*
aligned_stack
::
push
()
{
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
template
<
typename
T
>
void
*
aligned_stack
::
push
()
{
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
// Move head to next aligned position after new object
head_
=
base
::
alignment
::
next_alignment
(
head_
+
sizeof
(
T
));
...
...
@@ -22,15 +23,16 @@ namespace pls {
}
return
result
;
}
}
template
<
typename
T
>
T
aligned_stack
::
pop
()
{
template
<
typename
T
>
T
aligned_stack
::
pop
()
{
head_
=
head_
-
base
::
alignment
::
next_alignment
(
sizeof
(
T
));
return
*
reinterpret_cast
<
T
*>
(
head_
);
}
}
}
return
*
reinterpret_cast
<
T
*>
(
head_
);
}
}
}
}
#endif //PLS_ALIGNED_STACK_IMPL_H
lib/pls/include/pls/internal/data_structures/deque.h
View file @
aa270645
...
...
@@ -5,56 +5,58 @@
#include "pls/internal/base/spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
/**
namespace
internal
{
namespace
data_structures
{
/**
* Turns any object into deque item when inheriting from this.
*/
class
deque_item
{
class
deque_item
{
friend
class
deque_internal
;
deque_item
*
prev_
;
deque_item
*
next_
;
deque_item
*
prev_
;
deque_item
*
next_
;
};
};
class
deque_internal
{
class
deque_internal
{
protected
:
deque_item
*
head_
;
deque_item
*
tail_
;
deque_item
*
head_
;
deque_item
*
tail_
;
base
::
spin_lock
lock_
;
deque_item
*
pop_head_internal
();
deque_item
*
pop_tail_internal
();
deque_item
*
pop_head_internal
();
deque_item
*
pop_tail_internal
();
void
push_tail_internal
(
deque_item
*
new_item
);
};
};
/**
/**
* A double linked list based deque.
* Storage is therefore only needed for the individual items.
*
* @tparam Item The type of items stored in this deque
*/
template
<
typename
Item
>
class
deque
:
deque_internal
{
template
<
typename
Item
>
class
deque
:
deque_internal
{
public
:
explicit
deque
()
:
deque_internal
{}
{}
explicit
deque
()
:
deque_internal
{}
{}
inline
Item
*
pop_head
()
{
return
static_cast
<
Item
*>
(
pop_head_internal
());
inline
Item
*
pop_head
()
{
return
static_cast
<
Item
*>
(
pop_head_internal
());
}
inline
Item
*
pop_tail
()
{
return
static_cast
<
Item
*>
(
pop_tail_internal
());
inline
Item
*
pop_tail
()
{
return
static_cast
<
Item
*>
(
pop_tail_internal
());
}
inline
void
push_tail
(
Item
*
new_item
)
{
inline
void
push_tail
(
Item
*
new_item
)
{
push_tail_internal
(
new_item
);
}
};
}
}
};
}
}
}
#endif //PLS_DEQUE_H
lib/pls/include/pls/internal/helpers/mini_benchmark.h
View file @
aa270645
...
...
@@ -9,11 +9,12 @@
#include <iostream>
namespace
pls
{
namespace
internal
{
namespace
helpers
{
// TODO: Clean up (separate into small functions and .cpp file)
template
<
typename
Function
>
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
unsigned
long
max_runtime_ms
=
1000
)
{
namespace
internal
{
namespace
helpers
{
// TODO: Clean up (separate into small functions and .cpp file)
template
<
typename
Function
>
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
unsigned
long
max_runtime_ms
=
1000
)
{
using
namespace
std
;
using
namespace
pls
::
internal
::
scheduling
;
...
...
@@ -37,7 +38,7 @@ namespace pls {
});
long
time
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
end_time
-
start_time
).
count
();
double
time_per_iteration
=
(
double
)
time
/
iterations
;
double
time_per_iteration
=
(
double
)
time
/
iterations
;
std
::
cout
<<
time_per_iteration
;
if
(
num_threads
<
max_threads
)
{
...
...
@@ -45,9 +46,10 @@ namespace pls {
}
}
std
::
cout
<<
std
::
endl
;
}
}
}
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
lib/pls/include/pls/internal/helpers/prohibit_new.h
View file @
aa270645
...
...
@@ -15,7 +15,7 @@
#ifdef NEW_LINK_ERROR
// This will cause a linker error if new is used in the code.
// We also exit if it is somehow still called.
inline
void
*
operator
new
(
std
::
size_t
)
{
inline
void
*
operator
new
(
std
::
size_t
)
{
extern
int
bare_new_erroneously_called
();
exit
(
bare_new_erroneously_called
()
|
1
);
}
...
...
lib/pls/include/pls/internal/helpers/unique_id.h
View file @
aa270645
...
...
@@ -7,12 +7,13 @@
#include <stdint.h>
namespace
pls
{
namespace
internal
{
namespace
helpers
{
struct
unique_id
{
namespace
internal
{
namespace
helpers
{
struct
unique_id
{
const
uint32_t
id_
;
const
std
::
type_info
&
type_
;
bool
operator
==
(
const
unique_id
&
other
)
const
{
return
id_
==
other
.
id_
&&
type_
==
other
.
type_
;
}
const
std
::
type_info
&
type_
;
bool
operator
==
(
const
unique_id
&
other
)
const
{
return
id_
==
other
.
id_
&&
type_
==
other
.
type_
;
}
static
constexpr
unique_id
create
(
const
uint32_t
id
)
{
return
unique_id
(
id
,
typeid
(
void
));
...
...
@@ -22,10 +23,11 @@ namespace pls {
return
unique_id
(
UINT32_MAX
,
typeid
(
std
::
tuple
<
T
...
>
));
}
private
:
explicit
constexpr
unique_id
(
const
uint32_t
id
,
const
std
::
type_info
&
type
)
:
id_
{
id
},
type_
{
type
}
{};
};
}
}
explicit
constexpr
unique_id
(
const
uint32_t
id
,
const
std
::
type_info
&
type
)
:
id_
{
id
},
type_
{
type
}
{};
};
}
}
}
#endif //PLS_UNIQUE_ID_H
lib/pls/include/pls/internal/scheduling/abstract_task.h
View file @
aa270645
...
...
@@ -6,38 +6,40 @@
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
class
abstract_task
{
namespace
internal
{
namespace
scheduling
{
class
abstract_task
{
public
:
using
id
=
helpers
::
unique_id
;
private
:
unsigned
int
depth_
;
abstract_task
::
id
unique_id_
;
abstract_task
*
child_task_
;
abstract_task
*
child_task_
;
public
:
abstract_task
(
const
unsigned
int
depth
,
const
abstract_task
::
id
&
unique_id
)
:
abstract_task
(
const
unsigned
int
depth
,
const
abstract_task
::
id
&
unique_id
)
:
depth_
{
depth
},
unique_id_
{
unique_id
},
child_task_
{
nullptr
}
{}
virtual
void
execute
()
=
0
;
void
set_child
(
abstract_task
*
child_task
)
{
child_task_
=
child_task
;
}
abstract_task
*
child
()
{
return
child_task_
;
}
void
set_child
(
abstract_task
*
child_task
)
{
child_task_
=
child_task
;
}
abstract_task
*
child
()
{
return
child_task_
;
}
void
set_depth
(
unsigned
int
depth
)
{
depth_
=
depth
;
}
unsigned
int
depth
()
const
{
return
depth_
;
}
id
unique_id
()
const
{
return
unique_id_
;
}
protected
:
virtual
bool
internal_stealing
(
abstract_task
*
other_task
)
=
0
;
virtual
bool
split_task
(
base
::
spin_lock
*
lock
)
=
0
;
virtual
bool
internal_stealing
(
abstract_task
*
other_task
)
=
0
;
virtual
bool
split_task
(
base
::
spin_lock
*
lock
)
=
0
;
bool
steal_work
();
};
}
}
};
}
}
}
#endif //PLS_ABSTRACT_TASK_H
lib/pls/include/pls/internal/scheduling/fork_join_task.h
View file @
aa270645
...
...
@@ -11,24 +11,25 @@
#include "thread_state.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
class
fork_join_task
;
class
fork_join_sub_task
:
public
data_structures
::
deque_item
{
namespace
internal
{
namespace
scheduling
{
class
fork_join_task
;
class
fork_join_sub_task
:
public
data_structures
::
deque_item
{
friend
class
fork_join_task
;
// Coordinate finishing of sub_tasks
std
::
atomic_uint32_t
ref_count_
;
fork_join_sub_task
*
parent_
;
fork_join_sub_task
*
parent_
;
// Access to TBB scheduling environment
fork_join_task
*
tbb_task_
;
fork_join_task
*
tbb_task_
;
// Stack Management (reset stack pointer after wait_for_all() calls)
data_structures
::
aligned_stack
::
state
stack_state_
;
protected
:
explicit
fork_join_sub_task
();
fork_join_sub_task
(
const
fork_join_sub_task
&
other
);
fork_join_sub_task
(
const
fork_join_sub_task
&
other
);
// Overwritten with behaviour of child tasks
virtual
void
execute_internal
()
=
0
;
...
...
@@ -36,62 +37,63 @@ namespace pls {
public
:
// Only use them when actually executing this sub_task (only public for simpler API design)
template
<
typename
T
>
void
spawn_child
(
const
T
&
sub_task
);
void
spawn_child
(
const
T
&
sub_task
);
void
wait_for_all
();
private
:
void
spawn_child_internal
(
fork_join_sub_task
*
sub_task
);
void
spawn_child_internal
(
fork_join_sub_task
*
sub_task
);
void
execute
();
};
};
template
<
typename
Function
>
class
fork_join_lambda
:
public
fork_join_sub_task
{
const
Function
*
function_
;
template
<
typename
Function
>
class
fork_join_lambda
:
public
fork_join_sub_task
{
const
Function
*
function_
;
public
:
explicit
fork_join_lambda
(
const
Function
*
function
)
:
function_
{
function
}
{};
explicit
fork_join_lambda
(
const
Function
*
function
)
:
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
(
*
function_
)(
this
);
}
};
};
class
fork_join_task
:
public
abstract_task
{
class
fork_join_task
:
public
abstract_task
{
friend
class
fork_join_sub_task
;
fork_join_sub_task
*
root_task_
;
fork_join_sub_task
*
currently_executing_
;
data_structures
::
aligned_stack
*
my_stack_
;
fork_join_sub_task
*
root_task_
;
fork_join_sub_task
*
currently_executing_
;
data_structures
::
aligned_stack
*
my_stack_
;
// Double-Ended Queue management
data_structures
::
deque
<
fork_join_sub_task
>
deque_
;
// Steal Management
fork_join_sub_task
*
last_stolen_
;
fork_join_sub_task
*
last_stolen_
;
fork_join_sub_task
*
get_local_sub_task
();
fork_join_sub_task
*
get_stolen_sub_task
();
fork_join_sub_task
*
get_local_sub_task
();
fork_join_sub_task
*
get_stolen_sub_task
();
bool
internal_stealing
(
abstract_task
*
other_task
)
override
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
;
bool
internal_stealing
(
abstract_task
*
other_task
)
override
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
;
public
:
explicit
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
);
explicit
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
);
void
execute
()
override
;
fork_join_sub_task
*
currently_executing
()
const
;
};
fork_join_sub_task
*
currently_executing
()
const
;
};
template
<
typename
T
>
void
fork_join_sub_task
::
spawn_child
(
const
T
&
task
)
{
template
<
typename
T
>
void
fork_join_sub_task
::
spawn_child
(
const
T
&
task
)
{
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
static_assert
(
std
::
is_base_of
<
fork_join_sub_task
,
T
>::
value
,
"Only pass fork_join_sub_task subclasses!"
);
T
*
new_task
=
tbb_task_
->
my_stack_
->
push
(
task
);
T
*
new_task
=
tbb_task_
->
my_stack_
->
push
(
task
);
spawn_child_internal
(
new_task
);
}
}
}
}
}
}
}
#endif //PLS_TBB_LIKE_TASK_H
lib/pls/include/pls/internal/scheduling/root_task.h
View file @
aa270645
...
...
@@ -10,20 +10,21 @@
#include "abstract_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
root_task
:
public
abstract_task
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
root_task
:
public
abstract_task
{
Function
function_
;
std
::
atomic_uint8_t
finished_
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
root_task
<
Function
>>
;
explicit
root_task
(
Function
function
)
:
explicit
root_task
(
Function
function
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
finished_
{
0
}
{}
root_task
(
const
root_task
&
other
)
:
root_task
(
const
root_task
&
other
)
:
abstract_task
{
0
,
create_id
()},
function_
{
other
.
function_
},
finished_
{
0
}
{}
...
...
@@ -38,23 +39,23 @@ namespace pls {
finished_
=
1
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
};
template
<
typename
Function
>
class
root_worker_task
:
public
abstract_task
{
root_task
<
Function
>*
master_task_
;
template
<
typename
Function
>
class
root_worker_task
:
public
abstract_task
{
root_task
<
Function
>
*
master_task_
;
public
:
static
constexpr
auto
create_id
=
root_task
<
Function
>::
create_id
;
explicit
root_worker_task
(
root_task
<
Function
>*
master_task
)
:
explicit
root_worker_task
(
root_task
<
Function
>
*
master_task
)
:
abstract_task
{
0
,
create_id
()},
master_task_
{
master_task
}
{}
...
...
@@ -65,16 +66,17 @@ namespace pls {
}
while
(
!
master_task_
->
finished
());
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
}
}
};
}
}
}
#endif //PLS_ROOT_MASTER_TASK_H
lib/pls/include/pls/internal/scheduling/run_on_n_threads_task.h
View file @
aa270645
...
...
@@ -12,12 +12,14 @@
#include "scheduler.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
run_on_n_threads_task
:
public
abstract_task
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
run_on_n_threads_task
:
public
abstract_task
{
template
<
typename
F
>
friend
class
run_on_n_threads_task_worker
;
friend
class
run_on_n_threads_task_worker
;
Function
function_
;
...
...
@@ -38,7 +40,7 @@ namespace pls {
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task
<
Function
>>
;
run_on_n_threads_task
(
Function
function
,
int
num_threads
)
:
run_on_n_threads_task
(
Function
function
,
int
num_threads
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
counter
{
num_threads
-
1
}
{}
...
...
@@ -55,21 +57,21 @@ namespace pls {
std
::
cout
<<
"Finished Master!"
<<
std
::
endl
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
lock
)
override
;
};
bool
split_task
(
base
::
spin_lock
*
lock
)
override
;
};
template
<
typename
Function
>
class
run_on_n_threads_task_worker
:
public
abstract_task
{
template
<
typename
Function
>
class
run_on_n_threads_task_worker
:
public
abstract_task
{
Function
function_
;
run_on_n_threads_task
<
Function
>*
root_
;
run_on_n_threads_task
<
Function
>
*
root_
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task_worker
<
Function
>>
;
run_on_n_threads_task_worker
(
Function
function
,
run_on_n_threads_task
<
Function
>*
root
)
:
run_on_n_threads_task_worker
(
Function
function
,
run_on_n_threads_task
<
Function
>
*
root
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
root_
{
root
}
{}
...
...
@@ -83,17 +85,17 @@ namespace pls {
}
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
};
template
<
typename
Function
>
bool
run_on_n_threads_task
<
Function
>::
split_task
(
base
::
spin_lock
*
lock
)
{
template
<
typename
Function
>
bool
run_on_n_threads_task
<
Function
>::
split_task
(
base
::
spin_lock
*
lock
)
{
if
(
get_counter
()
<=
0
)
{
return
false
;
}
...
...
@@ -105,14 +107,15 @@ namespace pls {
auto
task
=
run_on_n_threads_task_worker
<
Function
>
{
function_
,
this
};
scheduler
->
execute_task
(
task
,
depth
());
return
true
;
}
}
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>
create_run_on_n_threads_task
(
Function
function
,
int
num_threads
)
{
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>
create_run_on_n_threads_task
(
Function
function
,
int
num_threads
)
{
return
run_on_n_threads_task
<
Function
>
{
function
,
num_threads
};
}
}
}
}
}
}
}
#endif //PLS_RUN_ON_N_THREADS_TASK_H
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
aa270645
...
...
@@ -17,21 +17,22 @@
#include "scheduler_memory.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
namespace
internal
{
namespace
scheduling
{
class
scheduler
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
class
scheduler
{
friend
void
worker_routine
();
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
scheduler_memory
*
memory_
;
base
::
barrier
sync_barrier_
;
bool
terminated_
;
public
:
explicit
scheduler
(
scheduler_memory
*
memory
,
unsigned
int
num_threads
);
explicit
scheduler
(
scheduler_memory
*
memory
,
unsigned
int
num_threads
);
~
scheduler
();
/**
...
...
@@ -50,17 +51,18 @@ namespace pls {
* @param depth Optional: depth of the new task, otherwise set implicitly.
*/
template
<
typename
Task
>
static
void
execute_task
(
Task
&
task
,
int
depth
=
-
1
);
static
void
execute_task
(
Task
&
task
,
int
depth
=
-
1
);
static
abstract_task
*
current_task
()
{
return
base
::
this_thread
::
state
<
thread_state
>
()
->
current_task_
;
}
static
abstract_task
*
current_task
()
{
return
base
::
this_thread
::
state
<
thread_state
>
()
->
current_task_
;
}
void
terminate
(
bool
wait_for_workers
=
true
);
void
terminate
(
bool
wait_for_workers
=
true
);
unsigned
int
num_threads
()
const
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
};
}
}
thread_state
*
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
};
}
}
}
#include "scheduler_impl.h"
...
...
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
aa270645
...
...
@@ -3,10 +3,11 @@
#define PLS_SCHEDULER_IMPL_H
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
root_task
<
Function
>
master
{
work_section
};
...
...
@@ -31,15 +32,15 @@ namespace pls {
root_worker_task
<
Function
>
worker
{
new_master
};
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
worker
)
>
();
}
}
}
template
<
typename
Task
>
void
scheduler
::
execute_task
(
Task
&
task
,
int
depth
)
{
template
<
typename
Task
>
void
scheduler
::
execute_task
(
Task
&
task
,
int
depth
)
{
static_assert
(
std
::
is_base_of
<
abstract_task
,
Task
>::
value
,
"Only pass abstract_task subclasses!"
);
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
abstract_task
*
old_task
;
abstract_task
*
new_task
;
abstract_task
*
old_task
;
abstract_task
*
new_task
;
// Init Task
{
...
...
@@ -64,9 +65,10 @@ namespace pls {
my_state
->
task_stack_
->
pop
<
Task
>
();
}
}
}
}
}
}
}
}
#endif //PLS_SCHEDULER_IMPL_H
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
View file @
aa270645
...
...
@@ -7,21 +7,22 @@
#define PLS_SCHEDULER_MEMORY_H
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
namespace
internal
{
namespace
scheduling
{
class
scheduler_memory
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
class
scheduler_memory
{
public
:
virtual
size_t
max_threads
()
const
=
0
;
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
};
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
};
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
class
static_scheduler_memory
:
public
scheduler_memory
{
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
class
static_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
...
...
@@ -38,17 +39,18 @@ namespace pls {
public
:
static_scheduler_memory
()
{
for
(
size_t
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
TASK_STACK_SIZE
);
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
TASK_STACK_SIZE
);
}
}
size_t
max_threads
()
const
override
{
return
MAX_THREADS
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
class
malloc_scheduler_memory
:
public
scheduler_memory
{
class
malloc_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
...
...
@@ -58,21 +60,22 @@ namespace pls {
const
size_t
num_threads_
;
aligned_thread
*
threads_
;
aligned_thread_state
*
thread_states_
;
char
**
task_stacks_memory_
;
aligned_aligned_stack
*
task_stacks_
;
aligned_thread
*
threads_
;
aligned_thread_state
*
thread_states_
;
char
**
task_stacks_memory_
;
aligned_aligned_stack
*
task_stacks_
;
public
:
explicit
malloc_scheduler_memory
(
size_t
num_threads
,
size_t
memory_per_stack
=
2
<<
16
);
~
malloc_scheduler_memory
();
size_t
max_threads
()
const
override
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
}
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
}
}
}
#endif //PLS_SCHEDULER_MEMORY_H
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
aa270645
...
...
@@ -8,21 +8,22 @@
#include "abstract_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
// forward declaration
class
scheduler
;
struct
thread_state
{
scheduler
*
scheduler_
;
abstract_task
*
root_task_
;
abstract_task
*
current_task_
;
data_structures
::
aligned_stack
*
task_stack_
;
namespace
internal
{
namespace
scheduling
{
// forward declaration
class
scheduler
;
struct
thread_state
{
scheduler
*
scheduler_
;
abstract_task
*
root_task_
;
abstract_task
*
current_task_
;
data_structures
::
aligned_stack
*
task_stack_
;
size_t
id_
;
base
::
spin_lock
lock_
;
std
::
minstd_rand
random_
;
thread_state
()
:
thread_state
()
:
scheduler_
{
nullptr
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
...
...
@@ -30,16 +31,17 @@ namespace pls {
id_
{
0
},
random_
{
id_
}
{};
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
scheduler_
{
scheduler
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
task_stack
},
id_
{
id
},
random_
{
id_
}
{}
};
}
}
};
}
}
}
#endif //PLS_THREAD_STATE_H
lib/pls/include/pls/pls.h
View file @
aa270645
...
...
@@ -8,18 +8,20 @@
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
using
internal
::
scheduling
::
static_scheduler_memory
;
using
internal
::
scheduling
::
malloc_scheduler_memory
;
using
internal
::
scheduling
::
scheduler
;
using
task_id
=
internal
::
scheduling
::
abstract_task
::
id
;
using
internal
::
scheduling
::
static_scheduler_memory
;
using
internal
::
scheduling
::
malloc_scheduler_memory
;
using
unique_id
=
internal
::
helpers
::
unique_id
;
using
internal
::
scheduling
::
scheduler
;
using
task_id
=
internal
::
scheduling
::
abstract_task
::
id
;
using
internal
::
scheduling
::
fork_join_sub_task
;
using
internal
::
scheduling
::
fork_join_task
;
using
unique_id
=
internal
::
helpers
::
unique_id
;
using
internal
::
scheduling
::
fork_join_sub_task
;
using
internal
::
scheduling
::
fork_join_task
;
using
algorithm
::
invoke_parallel
;
using
algorithm
::
invoke_parallel
;
}
#endif
lib/pls/src/internal/base/alignment.cpp
View file @
aa270645
...
...
@@ -2,26 +2,28 @@
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
void
*
allocate_aligned
(
size_t
size
)
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
void
*
allocate_aligned
(
size_t
size
)
{
return
aligned_alloc
(
system_details
::
CACHE_LINE_SIZE
,
size
);
}
}
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
std
::
uintptr_t
miss_alignment
=
size
%
base
::
system_details
::
CACHE_LINE_SIZE
;
if
(
miss_alignment
==
0
)
{
return
size
;
}
else
{
return
size
+
(
base
::
system_details
::
CACHE_LINE_SIZE
-
miss_alignment
);
}
}
}
char
*
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
}
}
}
char
*
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
}
}
}
}
lib/pls/src/internal/base/barrier.cpp
View file @
aa270645
#include "pls/internal/base/barrier.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
barrier
::
barrier
(
const
unsigned
int
count
)
:
barrier_
{}
{
namespace
internal
{
namespace
base
{
barrier
::
barrier
(
const
unsigned
int
count
)
:
barrier_
{}
{
pthread_barrier_init
(
&
barrier_
,
nullptr
,
count
);
}
}
barrier
::~
barrier
()
{
barrier
::~
barrier
()
{
pthread_barrier_destroy
(
&
barrier_
);
}
}
void
barrier
::
wait
()
{
void
barrier
::
wait
()
{
pthread_barrier_wait
(
&
barrier_
);
}
}
}
}
}
}
}
lib/pls/src/internal/base/tas_spin_lock.cpp
View file @
aa270645
...
...
@@ -2,9 +2,10 @@
#include "pls/internal/base/tas_spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
void
tas_spin_lock
::
lock
()
{
namespace
internal
{
namespace
base
{
void
tas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
int
tries
=
0
;
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
...
...
@@ -13,9 +14,9 @@ namespace pls {
this_thread
::
yield
();
}
}
}
}
bool
tas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
bool
tas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
num_tries
--
;
...
...
@@ -24,11 +25,12 @@ namespace pls {
}
}
return
true
;
}
}
void
tas_spin_lock
::
unlock
()
{
void
tas_spin_lock
::
unlock
()
{
flag_
.
clear
(
std
::
memory_order_release
);
}
}
}
}
}
}
}
lib/pls/src/internal/base/thread.cpp
View file @
aa270645
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
internal
{
namespace
base
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_key_t
this_thread
::
local_storage_key_
=
false
;
bool
this_thread
::
local_storage_key_initialized_
;
pthread_key_t
this_thread
::
local_storage_key_
=
false
;
bool
this_thread
::
local_storage_key_initialized_
;
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
__thread
void
*
this_thread
::
local_state_
;
__thread
void
*
this_thread
::
local_state_
;
#endif
// implementation in header (C++ templating)
}
}
// implementation in header (C++ templating)
}
}
}
lib/pls/src/internal/base/ttas_spin_lock.cpp
View file @
aa270645
...
...
@@ -2,9 +2,10 @@
#include "pls/internal/base/ttas_spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
void
ttas_spin_lock
::
lock
()
{
namespace
internal
{
namespace
base
{
void
ttas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
int
tries
=
0
;
int
expected
=
0
;
...
...
@@ -19,9 +20,9 @@ namespace pls {
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
}
}
bool
ttas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
bool
ttas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
int
expected
=
0
;
...
...
@@ -37,11 +38,12 @@ namespace pls {
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
return
true
;
}
}
void
ttas_spin_lock
::
unlock
()
{
void
ttas_spin_lock
::
unlock
()
{
flag_
.
store
(
0
,
std
::
memory_order_release
);
}
}
}
}
}
}
}
lib/pls/src/internal/data_structures/aligned_stack.cpp
View file @
aa270645
...
...
@@ -2,12 +2,14 @@
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
namespace
internal
{
namespace
data_structures
{
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
memory_start_
{
memory_region
},
memory_end_
{
memory_region
+
size
},
head_
{
base
::
alignment
::
next_alignment
(
memory_start_
)}
{}
}
}
}
}
}
lib/pls/src/internal/data_structures/deque.cpp
View file @
aa270645
...
...
@@ -3,16 +3,17 @@
#include "pls/internal/data_structures/deque.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
deque_item
*
deque_internal
::
pop_head_internal
()
{
namespace
internal
{
namespace
data_structures
{
deque_item
*
deque_internal
::
pop_head_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
head_
==
nullptr
)
{
return
nullptr
;
}
deque_item
*
result
=
head_
;
deque_item
*
result
=
head_
;
head_
=
head_
->
prev_
;
if
(
head_
==
nullptr
)
{
tail_
=
nullptr
;
...
...
@@ -21,16 +22,16 @@ namespace pls {
}
return
result
;
}
}
deque_item
*
deque_internal
::
pop_tail_internal
()
{
deque_item
*
deque_internal
::
pop_tail_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
==
nullptr
)
{
return
nullptr
;
}
deque_item
*
result
=
tail_
;
deque_item
*
result
=
tail_
;
tail_
=
tail_
->
next_
;
if
(
tail_
==
nullptr
)
{
head_
=
nullptr
;
...
...
@@ -39,9 +40,9 @@ namespace pls {
}
return
result
;
}
}
void
deque_internal
::
push_tail_internal
(
deque_item
*
new_item
)
{
void
deque_internal
::
push_tail_internal
(
deque_item
*
new_item
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
!=
nullptr
)
{
...
...
@@ -52,7 +53,8 @@ namespace pls {
new_item
->
next_
=
tail_
;
new_item
->
prev_
=
nullptr
;
tail_
=
new_item
;
}
}
}
}
}
}
}
lib/pls/src/internal/scheduling/abstract_task.cpp
View file @
aa270645
...
...
@@ -5,9 +5,10 @@
#include "pls/internal/scheduling/scheduler.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
bool
abstract_task
::
steal_work
()
{
namespace
internal
{
namespace
scheduling
{
bool
abstract_task
::
steal_work
()
{
PROFILE_STEALING
(
"abstract_task::steal_work"
)
const
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
const
auto
my_scheduler
=
my_state
->
scheduler_
;
...
...
@@ -27,7 +28,7 @@ namespace pls {
// Dig down to our level
PROFILE_STEALING
(
"Go to our level"
)
abstract_task
*
current_task
=
target_state
->
root_task_
;
abstract_task
*
current_task
=
target_state
->
root_task_
;
while
(
current_task
!=
nullptr
&&
current_task
->
depth
()
<
depth
())
{
current_task
=
current_task
->
child_task_
;
}
...
...
@@ -70,7 +71,8 @@ namespace pls {
// internal steal was no success
return
false
;
};
}
}
}
}
}
}
lib/pls/src/internal/scheduling/fork_join_task.cpp
View file @
aa270645
...
...
@@ -4,23 +4,24 @@
#include "pls/internal/scheduling/fork_join_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
fork_join_sub_task
::
fork_join_sub_task
()
:
namespace
internal
{
namespace
scheduling
{
fork_join_sub_task
::
fork_join_sub_task
()
:
data_structures
::
deque_item
{},
ref_count_
{
0
},
parent_
{
nullptr
},
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
fork_join_sub_task
::
fork_join_sub_task
(
const
fork_join_sub_task
&
other
)
:
fork_join_sub_task
::
fork_join_sub_task
(
const
fork_join_sub_task
&
other
)
:
data_structures
::
deque_item
(
other
),
ref_count_
{
0
},
parent_
{
nullptr
},
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
void
fork_join_sub_task
::
execute
()
{
void
fork_join_sub_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute sub_task"
)
tbb_task_
->
currently_executing_
=
this
;
execute_internal
();
...
...
@@ -31,9 +32,9 @@ namespace pls {
if
(
parent_
!=
nullptr
)
{
parent_
->
ref_count_
--
;
}
}
}
void
fork_join_sub_task
::
spawn_child_internal
(
fork_join_sub_task
*
sub_task
)
{
void
fork_join_sub_task
::
spawn_child_internal
(
fork_join_sub_task
*
sub_task
)
{
// Keep our refcount up to date
ref_count_
++
;
...
...
@@ -43,12 +44,12 @@ namespace pls {
sub_task
->
stack_state_
=
tbb_task_
->
my_stack_
->
save_state
();
tbb_task_
->
deque_
.
push_tail
(
sub_task
);
}
}
void
fork_join_sub_task
::
wait_for_all
()
{
void
fork_join_sub_task
::
wait_for_all
()
{
while
(
ref_count_
>
0
)
{
PROFILE_STEALING
(
"get local sub task"
)
fork_join_sub_task
*
local_task
=
tbb_task_
->
get_local_sub_task
();
fork_join_sub_task
*
local_task
=
tbb_task_
->
get_local_sub_task
();
PROFILE_END_BLOCK
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
...
...
@@ -64,19 +65,19 @@ namespace pls {
}
}
tbb_task_
->
my_stack_
->
reset_state
(
stack_state_
);
}
}
fork_join_sub_task
*
fork_join_task
::
get_local_sub_task
()
{
fork_join_sub_task
*
fork_join_task
::
get_local_sub_task
()
{
return
deque_
.
pop_tail
();
}
}
fork_join_sub_task
*
fork_join_task
::
get_stolen_sub_task
()
{
fork_join_sub_task
*
fork_join_task
::
get_stolen_sub_task
()
{
return
deque_
.
pop_head
();
}
}
bool
fork_join_task
::
internal_stealing
(
abstract_task
*
other_task
)
{
bool
fork_join_task
::
internal_stealing
(
abstract_task
*
other_task
)
{
PROFILE_STEALING
(
"fork_join_task::internal_stealin"
)
auto
cast_other_task
=
reinterpret_cast
<
fork_join_task
*>
(
other_task
);
auto
cast_other_task
=
reinterpret_cast
<
fork_join_task
*>
(
other_task
);
auto
stolen_sub_task
=
cast_other_task
->
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
...
...
@@ -90,11 +91,11 @@ namespace pls {
return
true
;
}
}
}
bool
fork_join_task
::
split_task
(
base
::
spin_lock
*
lock
)
{
bool
fork_join_task
::
split_task
(
base
::
spin_lock
*
lock
)
{
PROFILE_STEALING
(
"fork_join_task::split_task"
)
fork_join_sub_task
*
stolen_sub_task
=
get_stolen_sub_task
();
fork_join_sub_task
*
stolen_sub_task
=
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
...
...
@@ -106,9 +107,9 @@ namespace pls {
scheduler
::
execute_task
(
task
,
depth
());
return
true
;
}
}
void
fork_join_task
::
execute
()
{
void
fork_join_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute fork_join_task"
);
// Bind this instance to our OS thread
...
...
@@ -118,17 +119,18 @@ namespace pls {
// Execute it on our OS thread until its finished
root_task_
->
execute
();
}
}
fork_join_sub_task
*
fork_join_task
::
currently_executing
()
const
{
return
currently_executing_
;
}
fork_join_sub_task
*
fork_join_task
::
currently_executing
()
const
{
return
currently_executing_
;
}
fork_join_task
::
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
)
:
fork_join_task
::
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
)
:
abstract_task
{
0
,
id
},
root_task_
{
root_task
},
currently_executing_
{
nullptr
},
my_stack_
{
nullptr
},
deque_
{},
last_stolen_
{
nullptr
}
{};
}
}
last_stolen_
{
nullptr
}
{}
}
}
}
lib/pls/src/internal/scheduling/root_task.cpp
View file @
aa270645
#include "pls/internal/scheduling/root_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
namespace
internal
{
namespace
scheduling
{
}
}
}
}
}
lib/pls/src/internal/scheduling/run_on_n_threads_task.cpp
View file @
aa270645
#include "pls/internal/scheduling/run_on_n_threads_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
namespace
internal
{
namespace
scheduling
{
}
}
}
}
}
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
aa270645
...
...
@@ -2,9 +2,10 @@
#include "pls/internal/base/error_handling.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
scheduler
::
scheduler
(
scheduler_memory
*
memory
,
const
unsigned
int
num_threads
)
:
namespace
internal
{
namespace
scheduling
{
scheduler
::
scheduler
(
scheduler_memory
*
memory
,
const
unsigned
int
num_threads
)
:
num_threads_
{
num_threads
},
memory_
{
memory
},
sync_barrier_
{
num_threads
+
1
},
...
...
@@ -15,16 +16,17 @@ namespace pls {
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
// Placement new is required, as the memory of `memory_` is not required to be initialized.
new
((
void
*
)
memory_
->
thread_state_for
(
i
))
thread_state
{
this
,
memory_
->
task_stack_for
(
i
),
i
};
new
((
void
*
)
memory_
->
thread_for
(
i
))
base
::
thread
<
void
(
*
)(),
thread_state
>
(
&
worker_routine
,
memory_
->
thread_state_for
(
i
));
}
new
((
void
*
)
memory_
->
thread_state_for
(
i
))
thread_state
{
this
,
memory_
->
task_stack_for
(
i
),
i
};
new
((
void
*
)
memory_
->
thread_for
(
i
))
base
::
thread
<
void
(
*
)(),
thread_state
>
(
&
worker_routine
,
memory_
->
thread_state_for
(
i
));
}
}
scheduler
::~
scheduler
()
{
scheduler
::~
scheduler
()
{
terminate
();
}
}
void
worker_routine
()
{
void
worker_routine
()
{
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
while
(
true
)
{
...
...
@@ -40,9 +42,9 @@ namespace pls {
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
}
}
void
scheduler
::
terminate
(
bool
wait_for_workers
)
{
void
scheduler
::
terminate
(
bool
wait_for_workers
)
{
if
(
terminated_
)
{
return
;
}
...
...
@@ -55,7 +57,8 @@ namespace pls {
memory_
->
thread_for
(
i
)
->
join
();
}
}
}
}
}
}
}
}
}
lib/pls/src/internal/scheduling/scheduler_memory.cpp
View file @
aa270645
#include "pls/internal/scheduling/scheduler_memory.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
namespace
internal
{
namespace
scheduling
{
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
num_threads_
{
num_threads
}
{
threads_
=
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread
)));
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread_state
)));
threads_
=
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread
)));
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread_state
)));
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_aligned_stack
)));
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_aligned_stack
)));
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
}
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
}
}
malloc_scheduler_memory
::~
malloc_scheduler_memory
()
{
malloc_scheduler_memory
::~
malloc_scheduler_memory
()
{
free
(
threads_
);
free
(
thread_states_
);
...
...
@@ -25,7 +29,8 @@ namespace pls {
}
free
(
task_stacks_
);
free
(
task_stacks_memory_
);
}
}
}
}
}
}
}
lib/pls/src/internal/scheduling/thread_state.cpp
View file @
aa270645
#include "pls/internal/scheduling/thread_state.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
namespace
internal
{
namespace
scheduling
{
}
}
}
}
}
test/base_tests.cpp
View file @
aa270645
...
...
@@ -13,7 +13,7 @@ static bool base_tests_visited;
static
int
base_tests_local_value_one
;
static
vector
<
int
>
base_tests_local_value_two
;
TEST_CASE
(
"thread creation and joining"
,
"[internal/data_structures/thread.h]"
)
{
TEST_CASE
(
"thread creation and joining"
,
"[internal/data_structures/thread.h]"
)
{
base_tests_visited
=
false
;
auto
t1
=
start_thread
([]()
{
base_tests_visited
=
true
;
});
t1
.
join
();
...
...
@@ -21,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]")
REQUIRE
(
base_tests_visited
);
}
TEST_CASE
(
"thread state"
,
"[internal/data_structures/thread.h]"
)
{
TEST_CASE
(
"thread state"
,
"[internal/data_structures/thread.h]"
)
{
int
state_one
=
1
;
vector
<
int
>
state_two
{
1
,
2
};
...
...
@@ -36,12 +36,12 @@ TEST_CASE( "thread state", "[internal/data_structures/thread.h]") {
int
base_tests_shared_counter
;
TEST_CASE
(
"spinlock protects concurrent counter"
,
"[internal/data_structures/spinlock.h]"
)
{
TEST_CASE
(
"spinlock protects concurrent counter"
,
"[internal/data_structures/spinlock.h]"
)
{
constexpr
int
num_iterations
=
1000000
;
base_tests_shared_counter
=
0
;
spin_lock
lock
{};
SECTION
(
"lock can be used by itself"
)
{
SECTION
(
"lock can be used by itself"
)
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
...
...
@@ -63,7 +63,7 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/sp
REQUIRE
(
base_tests_shared_counter
==
0
);
}
SECTION
(
"lock can be used with std::lock_guard"
)
{
SECTION
(
"lock can be used with std::lock_guard"
)
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
...
...
test/data_structures_test.cpp
View file @
aa270645
...
...
@@ -12,13 +12,12 @@ using namespace pls::internal::data_structures;
using
namespace
pls
::
internal
::
base
;
using
namespace
std
;
TEST_CASE
(
"aligned stack stores objects correctly"
,
"[internal/data_structures/aligned_stack.h]"
)
{
TEST_CASE
(
"aligned stack stores objects correctly"
,
"[internal/data_structures/aligned_stack.h]"
)
{
constexpr
long
data_size
=
1024
;
char
data
[
data_size
];
aligned_stack
stack
{
data
,
data_size
};
SECTION
(
"stack correctly pushes sub linesize objects"
)
{
SECTION
(
"stack correctly pushes sub linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
...
...
@@ -32,7 +31,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly pushes above linesize objects"
)
{
SECTION
(
"stack correctly pushes above linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
system_details
::
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
...
...
@@ -43,7 +42,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly stores and retrieves objects"
)
{
SECTION
(
"stack correctly stores and retrieves objects"
)
{
std
::
array
<
char
,
5
>
data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
stack
.
push
(
data_one
);
...
...
@@ -52,7 +51,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE
(
retrieved_data
==
std
::
array
<
char
,
5
>
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
});
}
SECTION
(
"stack can push and pop multiple times with correct alignment"
)
{
SECTION
(
"stack can push and pop multiple times with correct alignment"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
...
...
@@ -76,15 +75,15 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
}
}
TEST_CASE
(
"deque stores objects correctly"
,
"[internal/data_structures/deque.h]"
)
{
class
my_item
:
public
deque_item
{
TEST_CASE
(
"deque stores objects correctly"
,
"[internal/data_structures/deque.h]"
)
{
class
my_item
:
public
deque_item
{
};
deque
<
my_item
>
deque
;
my_item
one
,
two
,
three
;
SECTION
(
"add and remove items form the tail"
)
{
SECTION
(
"add and remove items form the tail"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
...
...
@@ -94,7 +93,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
}
SECTION
(
"handles getting empty by popping the tail correctly"
)
{
SECTION
(
"handles getting empty by popping the tail correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
...
...
@@ -102,7 +101,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
}
SECTION
(
"remove items form the head"
)
{
SECTION
(
"remove items form the head"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
...
...
@@ -112,7 +111,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE
(
deque
.
pop_head
()
==
&
three
);
}
SECTION
(
"handles getting empty by popping the head correctly"
)
{
SECTION
(
"handles getting empty by popping the head correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
...
...
@@ -120,7 +119,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
}
SECTION
(
"handles getting empty by popping the head and tail correctly"
)
{
SECTION
(
"handles getting empty by popping the head and tail correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
...
...
test/scheduling_tests.cpp
View file @
aa270645
...
...
@@ -4,11 +4,11 @@
using
namespace
pls
;
class
once_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>*
counter_
;
class
once_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>
*
counter_
;
int
children_
;
protected
:
protected
:
void
execute_internal
()
override
{
(
*
counter_
)
++
;
for
(
int
i
=
0
;
i
<
children_
;
i
++
)
{
...
...
@@ -16,38 +16,37 @@ protected:
}
}
public
:
explicit
once_sub_task
(
std
::
atomic
<
int
>*
counter
,
int
children
)
:
public
:
explicit
once_sub_task
(
std
::
atomic
<
int
>
*
counter
,
int
children
)
:
fork_join_sub_task
(),
counter_
{
counter
},
children_
{
children
}
{}
};
class
force_steal_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>*
parent_counter_
;
std
::
atomic
<
int
>*
overall_counter_
;
class
force_steal_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>
*
parent_counter_
;
std
::
atomic
<
int
>
*
overall_counter_
;
protected
:
protected
:
void
execute_internal
()
override
{
(
*
overall_counter_
)
--
;
if
(
overall_counter_
->
load
()
>
0
)
{
std
::
atomic
<
int
>
counter
{
1
};
spawn_child
(
force_steal_sub_task
(
&
counter
,
overall_counter_
));
while
(
counter
.
load
()
>
0
)
;
// Spin...
while
(
counter
.
load
()
>
0
);
// Spin...
}
(
*
parent_counter_
)
--
;
}
public
:
explicit
force_steal_sub_task
(
std
::
atomic
<
int
>*
parent_counter
,
std
::
atomic
<
int
>*
overall_counter
)
:
public
:
explicit
force_steal_sub_task
(
std
::
atomic
<
int
>
*
parent_counter
,
std
::
atomic
<
int
>
*
overall_counter
)
:
fork_join_sub_task
(),
parent_counter_
{
parent_counter
},
overall_counter_
{
overall_counter
}
{}
};
TEST_CASE
(
"tbb task are scheduled correctly"
,
"[internal/scheduling/fork_join_task.h]"
)
{
TEST_CASE
(
"tbb task are scheduled correctly"
,
"[internal/scheduling/fork_join_task.h]"
)
{
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2
<<
12
};
SECTION
(
"tasks are executed exactly once"
)
{
...
...
@@ -56,7 +55,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t
int
total_tasks
=
1
+
4
+
4
*
3
+
4
*
3
*
2
+
4
*
3
*
2
*
1
;
std
::
atomic
<
int
>
counter
{
0
};
my_scheduler
.
perform_work
([
&
]
()
{
my_scheduler
.
perform_work
([
&
]()
{
once_sub_task
sub_task
{
&
counter
,
start_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
...
...
@@ -68,7 +67,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t
SECTION
(
"tasks can be stolen"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
8
};
my_scheduler
.
perform_work
([
&
]
()
{
my_scheduler
.
perform_work
([
&
]()
{
std
::
atomic
<
int
>
dummy_parent
{
1
},
overall_counter
{
8
};
force_steal_sub_task
sub_task
{
&
dummy_parent
,
&
overall_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment