Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
aa270645
authored
6 years ago
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Reformate code to fit GNU code formating style.
parent
3ff10baa
Pipeline
#1157
passed with stages
in 3 minutes 36 seconds
Changes
46
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
46 changed files
with
1813 additions
and
1745 deletions
+1813
-1745
app/benchmark_fft/main.cpp
+51
-52
app/invoke_parallel/main.cpp
+34
-34
app/playground/main.cpp
+4
-3
app/test_for_new/main.cpp
+3
-4
lib/pls/include/pls/algorithms/invoke_parallel.h
+9
-7
lib/pls/include/pls/algorithms/invoke_parallel_impl.h
+52
-50
lib/pls/include/pls/internal/base/alignment.h
+17
-15
lib/pls/include/pls/internal/base/barrier.h
+23
-21
lib/pls/include/pls/internal/base/spin_lock.h
+8
-6
lib/pls/include/pls/internal/base/system_details.h
+23
-21
lib/pls/include/pls/internal/base/tas_spin_lock.h
+24
-24
lib/pls/include/pls/internal/base/thread.h
+102
-99
lib/pls/include/pls/internal/base/thread_impl.h
+63
-62
lib/pls/include/pls/internal/base/ttas_spin_lock.h
+22
-24
lib/pls/include/pls/internal/data_structures/aligned_stack.h
+42
-39
lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
+30
-28
lib/pls/include/pls/internal/data_structures/deque.h
+52
-50
lib/pls/include/pls/internal/helpers/mini_benchmark.h
+40
-38
lib/pls/include/pls/internal/helpers/prohibit_new.h
+3
-3
lib/pls/include/pls/internal/helpers/unique_id.h
+20
-18
lib/pls/include/pls/internal/scheduling/abstract_task.h
+34
-32
lib/pls/include/pls/internal/scheduling/fork_join_task.h
+83
-81
lib/pls/include/pls/internal/scheduling/root_task.h
+67
-65
lib/pls/include/pls/internal/scheduling/run_on_n_threads_task.h
+103
-100
lib/pls/include/pls/internal/scheduling/scheduler.h
+46
-44
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+66
-64
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
+60
-57
lib/pls/include/pls/internal/scheduling/thread_state.h
+34
-32
lib/pls/include/pls/pls.h
+10
-8
lib/pls/src/internal/base/alignment.cpp
+22
-20
lib/pls/src/internal/base/barrier.cpp
+15
-13
lib/pls/src/internal/base/tas_spin_lock.cpp
+28
-26
lib/pls/src/internal/base/thread.cpp
+10
-8
lib/pls/src/internal/base/ttas_spin_lock.cpp
+43
-41
lib/pls/src/internal/data_structures/aligned_stack.cpp
+10
-8
lib/pls/src/internal/data_structures/deque.cpp
+54
-52
lib/pls/src/internal/scheduling/abstract_task.cpp
+60
-58
lib/pls/src/internal/scheduling/fork_join_task.cpp
+128
-126
lib/pls/src/internal/scheduling/root_task.cpp
+4
-4
lib/pls/src/internal/scheduling/run_on_n_threads_task.cpp
+4
-4
lib/pls/src/internal/scheduling/scheduler.cpp
+58
-55
lib/pls/src/internal/scheduling/scheduler_memory.cpp
+29
-24
lib/pls/src/internal/scheduling/thread_state.cpp
+4
-4
test/base_tests.cpp
+60
-60
test/data_structures_test.cpp
+102
-103
test/scheduling_tests.cpp
+57
-58
No files found.
app/benchmark_fft/main.cpp
View file @
aa270645
...
...
@@ -12,76 +12,75 @@ static constexpr int INPUT_SIZE = 2064;
typedef
std
::
vector
<
std
::
complex
<
double
>>
complex_vector
;
void
divide
(
complex_vector
::
iterator
data
,
int
n
)
{
complex_vector
tmp_odd_elements
(
n
/
2
);
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
tmp_odd_elements
[
i
]
=
data
[
i
*
2
+
1
];
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
]
=
data
[
i
*
2
];
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
+
n
/
2
]
=
tmp_odd_elements
[
i
];
}
complex_vector
tmp_odd_elements
(
n
/
2
);
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
tmp_odd_elements
[
i
]
=
data
[
i
*
2
+
1
];
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
]
=
data
[
i
*
2
];
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
+
n
/
2
]
=
tmp_odd_elements
[
i
];
}
}
void
combine
(
complex_vector
::
iterator
data
,
int
n
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
std
::
complex
<
double
>
even
=
data
[
i
];
std
::
complex
<
double
>
odd
=
data
[
i
+
n
/
2
];
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
std
::
complex
<
double
>
even
=
data
[
i
];
std
::
complex
<
double
>
odd
=
data
[
i
+
n
/
2
];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
std
::
complex
<
double
>
w
=
exp
(
std
::
complex
<
double
>
(
0
,
-
2.
*
M_PI
*
i
/
n
));
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
std
::
complex
<
double
>
w
=
exp
(
std
::
complex
<
double
>
(
0
,
-
2.
*
M_PI
*
i
/
n
));
data
[
i
]
=
even
+
w
*
odd
;
data
[
i
+
n
/
2
]
=
even
-
w
*
odd
;
}
data
[
i
]
=
even
+
w
*
odd
;
data
[
i
+
n
/
2
]
=
even
-
w
*
odd
;
}
}
void
fft
(
complex_vector
::
iterator
data
,
int
n
)
{
if
(
n
<
2
)
{
return
;
}
if
(
n
<
2
)
{
return
;
}
divide
(
data
,
n
);
if
(
n
<=
CUTOFF
)
{
fft
(
data
,
n
/
2
);
fft
(
data
+
n
/
2
,
n
/
2
);
}
else
{
pls
::
invoke_parallel
(
[
&
]
{
fft
(
data
,
n
/
2
);
},
[
&
]
{
fft
(
data
+
n
/
2
,
n
/
2
);
}
);
}
combine
(
data
,
n
);
divide
(
data
,
n
);
if
(
n
<=
CUTOFF
)
{
fft
(
data
,
n
/
2
);
fft
(
data
+
n
/
2
,
n
/
2
);
}
else
{
pls
::
invoke_parallel
(
[
&
]
{
fft
(
data
,
n
/
2
);
},
[
&
]
{
fft
(
data
+
n
/
2
,
n
/
2
);
}
);
}
combine
(
data
,
n
);
}
complex_vector
prepare_input
(
int
input_size
)
{
std
::
vector
<
double
>
known_frequencies
{
2
,
11
,
52
,
88
,
256
};
complex_vector
data
(
input_size
);
std
::
vector
<
double
>
known_frequencies
{
2
,
11
,
52
,
88
,
256
};
complex_vector
data
(
input_size
);
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
data
[
i
]
=
std
::
complex
<
double
>
(
0.0
,
0.0
);
for
(
auto
frequencie
:
known_frequencies
)
{
data
[
i
]
+=
sin
(
2
*
M_PI
*
frequencie
*
i
/
input_size
);
}
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
data
[
i
]
=
std
::
complex
<
double
>
(
0.0
,
0.0
);
for
(
auto
frequencie
:
known_frequencies
)
{
data
[
i
]
+=
sin
(
2
*
M_PI
*
frequencie
*
i
/
input_size
);
}
}
return
data
;
return
data
;
}
int
main
()
{
PROFILE_ENABLE
complex_vector
initial_input
=
prepare_input
(
INPUT_SIZE
);
PROFILE_ENABLE
complex_vector
initial_input
=
prepare_input
(
INPUT_SIZE
);
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
complex_vector
input
=
initial_input
;
fft
(
input
.
begin
(),
input
.
size
());
},
8
,
4000
);
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
complex_vector
input
=
initial_input
;
fft
(
input
.
begin
(),
input
.
size
());
},
8
,
4000
);
PROFILE_SAVE
(
"test_profile.prof"
)
PROFILE_SAVE
(
"test_profile.prof"
)
}
This diff is collapsed.
Click to expand it.
app/invoke_parallel/main.cpp
View file @
aa270645
...
...
@@ -8,44 +8,44 @@ static pls::static_scheduler_memory<8, 2 << 14> my_scheduler_memory;
static
constexpr
int
CUTOFF
=
10
;
long
fib_serial
(
long
n
)
{
if
(
n
==
0
)
{
return
0
;
}
if
(
n
==
1
)
{
return
1
;
}
return
fib_serial
(
n
-
1
)
+
fib_serial
(
n
-
2
);
if
(
n
==
0
)
{
return
0
;
}
if
(
n
==
1
)
{
return
1
;
}
return
fib_serial
(
n
-
1
)
+
fib_serial
(
n
-
2
);
}
long
fib
(
long
n
)
{
if
(
n
<=
CUTOFF
)
{
return
fib_serial
(
n
);
}
// Actual 'invoke_parallel' logic/code
int
left
,
right
;
pls
::
invoke_parallel
(
[
&
]
{
left
=
fib
(
n
-
1
);
},
[
&
]
{
right
=
fib
(
n
-
2
);
}
);
return
left
+
right
;
if
(
n
<=
CUTOFF
)
{
return
fib_serial
(
n
);
}
// Actual 'invoke_parallel' logic/code
int
left
,
right
;
pls
::
invoke_parallel
(
[
&
]
{
left
=
fib
(
n
-
1
);
},
[
&
]
{
right
=
fib
(
n
-
2
);
}
);
return
left
+
right
;
}
int
main
()
{
PROFILE_ENABLE
pls
::
scheduler
scheduler
{
&
my_scheduler_memory
,
8
};
long
result
;
scheduler
.
perform_work
([
&
]
{
PROFILE_MAIN_THREAD
// Call looks just the same, only requirement is
// the enclosure in the perform_work lambda.
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
result
=
fib
(
30
);
std
::
cout
<<
"Fib(30)="
<<
result
<<
std
::
endl
;
}
});
PROFILE_SAVE
(
"test_profile.prof"
)
PROFILE_ENABLE
pls
::
scheduler
scheduler
{
&
my_scheduler_memory
,
8
};
long
result
;
scheduler
.
perform_work
([
&
]
{
PROFILE_MAIN_THREAD
// Call looks just the same, only requirement is
// the enclosure in the perform_work lambda.
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
result
=
fib
(
30
);
std
::
cout
<<
"Fib(30)="
<<
result
<<
std
::
endl
;
}
});
PROFILE_SAVE
(
"test_profile.prof"
)
}
This diff is collapsed.
Click to expand it.
app/playground/main.cpp
View file @
aa270645
...
...
@@ -10,8 +10,9 @@
#include <pls/internal/scheduling/root_task.h>
#include <pls/internal/helpers/unique_id.h>
int
main
()
{
std
::
cout
<<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>::
create_id
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
helpers
::
unique_id
::
create
<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>>
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>::
create_id
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
helpers
::
unique_id
::
create
<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>>
().
type_
.
hash_code
()
<<
std
::
endl
;
}
This diff is collapsed.
Click to expand it.
app/test_for_new/main.cpp
View file @
aa270645
...
...
@@ -5,9 +5,8 @@ using namespace pls::internal::base;
int
global
=
0
;
int
main
()
{
// Try to use every feature, to trigger the prohibited use of new if found somewhere
auto
t1
=
start_thread
([]
()
{});
t1
.
join
();
// Try to use every feature, to trigger the prohibited use of new if found somewhere
auto
t1
=
start_thread
([]()
{});
t1
.
join
();
}
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/algorithms/invoke_parallel.h
View file @
aa270645
...
...
@@ -6,15 +6,17 @@
#include "pls/internal/scheduling/scheduler.h"
namespace
pls
{
namespace
algorithm
{
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
);
namespace
algorithm
{
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
);
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
);
// ...and so on, add more if we decide to keep this design
}
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
);
// ...and so on, add more if we decide to keep this design
}
}
#include "invoke_parallel_impl.h"
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/algorithms/invoke_parallel_impl.h
View file @
aa270645
...
...
@@ -7,65 +7,67 @@
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
algorithm
{
namespace
internal
{
using
namespace
::
pls
::
internal
::
scheduling
;
namespace
algorithm
{
namespace
internal
{
template
<
typename
Body
>
inline
void
run_body
(
const
Body
&
internal_body
,
const
abstract_task
::
id
&
id
)
{
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto
current_task
=
scheduler
::
current_task
();
if
(
current_task
->
unique_id
()
==
id
)
{
auto
current_sub_task
=
reinterpret_cast
<
fork_join_task
*>
(
current_task
)
->
currently_executing
();
internal_body
(
current_sub_task
);
}
else
{
fork_join_lambda
<
Body
>
root_body
(
&
internal_body
);
fork_join_task
root_task
{
&
root_body
,
id
};
scheduler
::
execute_task
(
root_task
);
}
}
}
using
namespace
::
pls
::
internal
::
scheduling
;
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
>
();
template
<
typename
Body
>
inline
void
run_body
(
const
Body
&
internal_body
,
const
abstract_task
::
id
&
id
)
{
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto
current_task
=
scheduler
::
current_task
();
if
(
current_task
->
unique_id
()
==
id
)
{
auto
current_sub_task
=
reinterpret_cast
<
fork_join_task
*>
(
current_task
)
->
currently_executing
();
internal_body
(
current_sub_task
);
}
else
{
fork_join_lambda
<
Body
>
root_body
(
&
internal_body
);
fork_join_task
root_task
{
&
root_body
,
id
};
scheduler
::
execute_task
(
root_task
);
}
}
}
auto
internal_body
=
[
&
]
(
fork_join_sub_task
*
this_task
){
auto
sub_task_body_1
=
[
&
]
(
fork_join_sub_task
*
){
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
>
();
this_task
->
spawn_child
(
sub_task_1
);
function2
();
// Execute last function 'inline' without spawning a sub_task object
this_task
->
wait_for_all
();
};
auto
internal_body
=
[
&
](
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
](
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
internal
::
run_body
(
internal_body
,
id
);
}
this_task
->
spawn_child
(
sub_task_1
);
function2
();
// Execute last function 'inline' without spawning a sub_task object
this_task
->
wait_for_all
();
};
internal
::
run_body
(
internal_body
,
id
);
}
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
,
Function3
>
();
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
,
Function3
>
();
auto
internal_body
=
[
&
]
(
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
]
(
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
auto
sub_task_body_2
=
[
&
]
(
fork_join_sub_task
*
)
{
function2
();
};
auto
sub_task_2
=
fork_join_lambda
<
decltype
(
sub_task_body_2
)
>
(
&
sub_task_body_2
);
auto
internal_body
=
[
&
](
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
](
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
auto
sub_task_body_2
=
[
&
](
fork_join_sub_task
*
)
{
function2
();
};
auto
sub_task_2
=
fork_join_lambda
<
decltype
(
sub_task_body_2
)
>
(
&
sub_task_body_2
);
this_task
->
spawn_child
(
sub_task_1
);
this_task
->
spawn_child
(
sub_task_2
);
function3
();
// Execute last function 'inline' without spawning a sub_task object
this_task
->
wait_for_all
();
};
this_task
->
spawn_child
(
sub_task_1
);
this_task
->
spawn_child
(
sub_task_2
);
function3
();
// Execute last function 'inline' without spawning a sub_task object
this_task
->
wait_for_all
();
};
internal
::
run_body
(
internal_body
,
id
);
}
}
internal
::
run_body
(
internal_body
,
id
);
}
}
}
#endif //PLS_INVOKE_PARALLEL_IMPL_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/alignment.h
View file @
aa270645
...
...
@@ -8,21 +8,23 @@
#include "system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
template
<
typename
T
>
struct
aligned_wrapper
{
alignas
(
system_details
::
CACHE_LINE_SIZE
)
unsigned
char
data
[
sizeof
(
T
)];
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
};
void
*
allocate_aligned
(
size_t
size
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
char
*
next_alignment
(
char
*
pointer
);
}
}
}
namespace
internal
{
namespace
base
{
namespace
alignment
{
template
<
typename
T
>
struct
aligned_wrapper
{
alignas
(
system_details
::
CACHE_LINE_SIZE
)
unsigned
char
data
[
sizeof
(
T
)];
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
};
void
*
allocate_aligned
(
size_t
size
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
char
*
next_alignment
(
char
*
pointer
);
}
}
}
}
#endif //PLS_ALIGNMENT_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/barrier.h
View file @
aa270645
...
...
@@ -5,27 +5,29 @@
#include <pthread.h>
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
* Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier.
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class
barrier
{
pthread_barrier_t
barrier_
;
public
:
explicit
barrier
(
unsigned
int
count
);
~
barrier
();
void
wait
();
};
}
}
namespace
internal
{
namespace
base
{
/**
* Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier.
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class
barrier
{
pthread_barrier_t
barrier_
;
public
:
explicit
barrier
(
unsigned
int
count
);
~
barrier
();
void
wait
();
};
}
}
}
#endif //PLS_BARRIER_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/spin_lock.h
View file @
aa270645
...
...
@@ -6,12 +6,14 @@
#include "ttas_spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
// Default Spin-Lock implementation for this project.
using
spin_lock
=
tas_spin_lock
;
}
}
namespace
internal
{
namespace
base
{
// Default Spin-Lock implementation for this project.
using
spin_lock
=
tas_spin_lock
;
}
}
}
#endif //PLS_SPINLOCK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/system_details.h
View file @
aa270645
...
...
@@ -5,29 +5,31 @@
#include <cstdint>
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
* Collection of system details, e.g. hardware cache line size.
*
* PORTABILITY:
* Currently sane default values for x86.
*/
namespace
system_details
{
/**
* Most processors have 64 byte cache lines
*/
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
namespace
internal
{
namespace
base
{
/**
* Choose one of the following ways to store thread specific data.
* Try to choose the fastest available on this processor/system.
*/
/**
* Collection of system details, e.g. hardware cache line size.
*
* PORTABILITY:
* Currently sane default values for x86.
*/
namespace
system_details
{
/**
* Most processors have 64 byte cache lines
*/
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
/**
* Choose one of the following ways to store thread specific data.
* Try to choose the fastest available on this processor/system.
*/
// #define PLS_THREAD_SPECIFIC_PTHREAD
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
}
#endif //PLS_SYSTEM_DETAILS_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/tas_spin_lock.h
View file @
aa270645
...
...
@@ -10,30 +10,30 @@
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class
tas_spin_lock
{
std
::
atomic_flag
flag_
;
unsigned
int
yield_at_tries_
;
public
:
tas_spin_lock
()
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
1024
}
{};
tas_spin_lock
(
const
tas_spin_lock
&
other
)
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
};
}
}
}
namespace
internal
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class
tas_spin_lock
{
std
::
atomic_flag
flag_
;
unsigned
int
yield_at_tries_
;
public
:
tas_spin_lock
()
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
1024
}
{};
tas_spin_lock
(
const
tas_spin_lock
&
other
)
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
};
}
}
}
#endif //PLS_TAS_SPIN_LOCK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/thread.h
View file @
aa270645
...
...
@@ -13,109 +13,112 @@
#include "system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
using
thread_entrypoint
=
void
();
/**
* Static methods than can be performed on the current thread.
*
* usage:
* this_thread::yield();
* T* state = this_thread::state<T>();
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class
this_thread
{
template
<
typename
Function
,
typename
State
>
friend
class
thread
;
namespace
internal
{
namespace
base
{
using
thread_entrypoint
=
void
();
/**
* Static methods than can be performed on the current thread.
*
* usage:
* this_thread::yield();
* T* state = this_thread::state<T>();
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class
this_thread
{
template
<
typename
Function
,
typename
State
>
friend
class
thread
;
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
static
pthread_key_t
local_storage_key_
;
static
bool
local_storage_key_initialized_
;
static
pthread_key_t
local_storage_key_
;
static
bool
local_storage_key_initialized_
;
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
static
__thread
void
*
local_state_
;
static
__thread
void
*
local_state_
;
#endif
public
:
static
void
yield
()
{
pthread_yield
();
}
/**
* Retrieves the local state pointer.
*
* @tparam T The type of the state that is stored.
* @return The state pointer hold for this thread.
*/
template
<
typename
T
>
static
T
*
state
();
/**
* Stores a pointer to the thread local state object.
* The memory management for this has to be done by the user,
* we only keep the pointer.
*
* @tparam T The type of the state that is stored.
* @param state_pointer A pointer to the threads state object.
*/
template
<
typename
T
>
static
void
set_state
(
T
*
state_pointer
);
};
/**
* Abstraction for starting a function in a separate thread.
*
* @tparam Function Lambda being started on the new thread.
* @tparam State State type held for this thread.
*
* usage:
* T* state;
* auto thread = start_thread([] {
* // Run on new thread
* }, state);
* thread.join(); // Wait for it to finish
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
template
<
typename
Function
,
typename
State
>
class
thread
{
friend
class
this_thread
;
// Keep a copy of the function (lambda) in this object to make sure it is valid when called!
Function
function_
;
State
*
state_pointer_
;
// Wee need to wait for the started function to read
// the function_ and state_pointer_ property before returning
// from the constructor, as the object might be moved after this.
std
::
atomic_flag
*
startup_flag_
;
// Keep handle to native implementation
pthread_t
pthread_thread_
;
static
void
*
start_pthread_internal
(
void
*
thread_pointer
);
public
:
explicit
thread
(
const
Function
&
function
,
State
*
state_pointer
);
public
:
void
join
();
// make object move only
thread
(
thread
&&
)
noexcept
=
default
;
thread
&
operator
=
(
thread
&&
)
noexcept
=
default
;
thread
(
const
thread
&
)
=
delete
;
thread
&
operator
=
(
const
thread
&
)
=
delete
;
};
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
);
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
);
}
}
public
:
static
void
yield
()
{
pthread_yield
();
}
/**
* Retrieves the local state pointer.
*
* @tparam T The type of the state that is stored.
* @return The state pointer hold for this thread.
*/
template
<
typename
T
>
static
T
*
state
();
/**
* Stores a pointer to the thread local state object.
* The memory management for this has to be done by the user,
* we only keep the pointer.
*
* @tparam T The type of the state that is stored.
* @param state_pointer A pointer to the threads state object.
*/
template
<
typename
T
>
static
void
set_state
(
T
*
state_pointer
);
};
/**
* Abstraction for starting a function in a separate thread.
*
* @tparam Function Lambda being started on the new thread.
* @tparam State State type held for this thread.
*
* usage:
* T* state;
* auto thread = start_thread([] {
* // Run on new thread
* }, state);
* thread.join(); // Wait for it to finish
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
template
<
typename
Function
,
typename
State
>
class
thread
{
friend
class
this_thread
;
// Keep a copy of the function (lambda) in this object to make sure it is valid when called!
Function
function_
;
State
*
state_pointer_
;
// Wee need to wait for the started function to read
// the function_ and state_pointer_ property before returning
// from the constructor, as the object might be moved after this.
std
::
atomic_flag
*
startup_flag_
;
// Keep handle to native implementation
pthread_t
pthread_thread_
;
static
void
*
start_pthread_internal
(
void
*
thread_pointer
);
public
:
explicit
thread
(
const
Function
&
function
,
State
*
state_pointer
);
public
:
void
join
();
// make object move only
thread
(
thread
&&
)
noexcept
=
default
;
thread
&
operator
=
(
thread
&&
)
noexcept
=
default
;
thread
(
const
thread
&
)
=
delete
;
thread
&
operator
=
(
const
thread
&
)
=
delete
;
};
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
);
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
);
}
}
}
#include "thread_impl.h"
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/thread_impl.h
View file @
aa270645
...
...
@@ -3,86 +3,87 @@
#define PLS_THREAD_IMPL_H
namespace
pls
{
namespace
internal
{
namespace
base
{
template
<
typename
T
>
T
*
this_thread
::
state
()
{
namespace
internal
{
namespace
base
{
template
<
typename
T
>
T
*
this_thread
::
state
()
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
return
reinterpret_cast
<
T
*>
(
pthread_getspecific
(
local_storage_key_
));
return
reinterpret_cast
<
T
*>
(
pthread_getspecific
(
local_storage_key_
));
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
return
reinterpret_cast
<
T
*>
(
local_state_
);
return
reinterpret_cast
<
T
*>
(
local_state_
);
#endif
}
}
template
<
typename
T
>
void
this_thread
::
set_state
(
T
*
state_pointer
)
{
template
<
typename
T
>
void
this_thread
::
set_state
(
T
*
state_pointer
)
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific
(
this_thread
::
local_storage_key_
,
(
void
*
)
state_pointer
);
pthread_setspecific
(
this_thread
::
local_storage_key_
,
(
void
*
)
state_pointer
);
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_
=
state_pointer
;
local_state_
=
state_pointer
;
#endif
}
}
template
<
typename
Function
,
typename
State
>
void
*
thread
<
Function
,
State
>::
start_pthread_internal
(
void
*
thread_pointer
)
{
auto
my_thread
=
reinterpret_cast
<
thread
*>
(
thread_pointer
);
Function
my_function_copy
=
my_thread
->
function_
;
State
*
my_state_pointer_copy
=
my_thread
->
state_pointer_
;
template
<
typename
Function
,
typename
State
>
void
*
thread
<
Function
,
State
>::
start_pthread_internal
(
void
*
thread_pointer
)
{
auto
my_thread
=
reinterpret_cast
<
thread
*>
(
thread_pointer
);
Function
my_function_copy
=
my_thread
->
function_
;
State
*
my_state_pointer_copy
=
my_thread
->
state_pointer_
;
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
// references to its memory location).
my_thread
->
startup_flag_
->
clear
();
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
// references to its memory location).
my_thread
->
startup_flag_
->
clear
();
this_thread
::
set_state
(
my_state_pointer_copy
);
my_function_copy
();
this_thread
::
set_state
(
my_state_pointer_copy
);
my_function_copy
();
// Finished executing the user function
pthread_exit
(
nullptr
);
}
// Finished executing the user function
pthread_exit
(
nullptr
);
}
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>::
thread
(
const
Function
&
function
,
State
*
state_pointer
)
:
function_
{
function
},
state_pointer_
{
state_pointer
},
startup_flag_
{
nullptr
},
pthread_thread_
{}
{
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>::
thread
(
const
Function
&
function
,
State
*
state_pointer
)
:
function_
{
function
},
state_pointer_
{
state_pointer
},
startup_flag_
{
nullptr
},
pthread_thread_
{}
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
if
(
!
this_thread
::
local_storage_key_initialized_
)
{
pthread_key_create
(
&
this_thread
::
local_storage_key_
,
nullptr
);
this_thread
::
local_storage_key_initialized_
=
true
;
}
if
(
!
this_thread
::
local_storage_key_initialized_
)
{
pthread_key_create
(
&
this_thread
::
local_storage_key_
,
nullptr
);
this_thread
::
local_storage_key_initialized_
=
true
;
}
#endif
// We only need this during startup, will be destroyed when out of scope
std
::
atomic_flag
startup_flag
{
ATOMIC_FLAG_INIT
};
startup_flag_
=
&
startup_flag
;
startup_flag
.
test_and_set
();
// Set the flag, pthread will clear it when it is safe to return
pthread_create
(
&
pthread_thread_
,
nullptr
,
start_pthread_internal
,
(
void
*
)
(
this
));
while
(
startup_flag
.
test_and_set
())
;
// Busy waiting for the starting flag to clear
}
template
<
typename
Function
,
typename
State
>
void
thread
<
Function
,
State
>::
join
()
{
pthread_join
(
pthread_thread_
,
nullptr
);
}
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
)
{
return
thread
<
Function
,
State
>
(
function
,
state_pointer
);
}
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
)
{
return
thread
<
Function
,
void
>
(
function
,
nullptr
);
}
}
}
// We only need this during startup, will be destroyed when out of scope
std
::
atomic_flag
startup_flag
{
ATOMIC_FLAG_INIT
};
startup_flag_
=
&
startup_flag
;
startup_flag
.
test_and_set
();
// Set the flag, pthread will clear it when it is safe to return
pthread_create
(
&
pthread_thread_
,
nullptr
,
start_pthread_internal
,
(
void
*
)
(
this
));
while
(
startup_flag
.
test_and_set
());
// Busy waiting for the starting flag to clear
}
template
<
typename
Function
,
typename
State
>
void
thread
<
Function
,
State
>::
join
()
{
pthread_join
(
pthread_thread_
,
nullptr
);
}
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
)
{
return
thread
<
Function
,
State
>
(
function
,
state_pointer
);
}
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
)
{
return
thread
<
Function
,
void
>
(
function
,
nullptr
);
}
}
}
}
#endif //PLS_THREAD_IMPL_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/base/ttas_spin_lock.h
View file @
aa270645
...
...
@@ -8,30 +8,28 @@
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class
ttas_spin_lock
{
std
::
atomic
<
int
>
flag_
;
const
unsigned
int
yield_at_tries_
;
public
:
ttas_spin_lock
()
:
flag_
{
0
},
yield_at_tries_
{
1024
}
{};
ttas_spin_lock
(
const
ttas_spin_lock
&
other
)
:
flag_
{
0
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
};
}
}
namespace
internal
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class
ttas_spin_lock
{
std
::
atomic
<
int
>
flag_
;
const
unsigned
int
yield_at_tries_
;
public
:
ttas_spin_lock
()
:
flag_
{
0
},
yield_at_tries_
{
1024
}
{};
ttas_spin_lock
(
const
ttas_spin_lock
&
other
)
:
flag_
{
0
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
};
}
}
}
#endif //PLS_TTAS_SPIN_LOCK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/data_structures/aligned_stack.h
View file @
aa270645
...
...
@@ -9,45 +9,48 @@
#include "pls/internal/base/alignment.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions).
*
* IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects!
*
* Usage:
* aligned_stack stack{pointer_to_memory, size_of_memory};
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* stack.pop<T>(); // Deconstruct the top object of type T
*/
class
aligned_stack
{
// Keep bounds of our memory block
char
*
memory_start_
;
char
*
memory_end_
;
// Current head will always be aligned to cache lines
char
*
head_
;
public
:
typedef
char
*
state
;
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
template
<
typename
T
>
T
*
push
(
const
T
&
object
);
template
<
typename
T
>
void
*
push
();
template
<
typename
T
>
T
pop
();
state
save_state
()
const
{
return
head_
;
}
void
reset_state
(
state
new_state
)
{
head_
=
new_state
;
}
};
}
}
namespace
internal
{
namespace
data_structures
{
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions).
*
* IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects!
*
* Usage:
* aligned_stack stack{pointer_to_memory, size_of_memory};
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* stack.pop<T>(); // Deconstruct the top object of type T
*/
class
aligned_stack
{
// Keep bounds of our memory block
char
*
memory_start_
;
char
*
memory_end_
;
// Current head will always be aligned to cache lines
char
*
head_
;
public
:
typedef
char
*
state
;
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
template
<
typename
T
>
T
*
push
(
const
T
&
object
);
template
<
typename
T
>
void
*
push
();
template
<
typename
T
>
T
pop
();
state
save_state
()
const
{
return
head_
;
}
void
reset_state
(
state
new_state
)
{
head_
=
new_state
;
}
};
}
}
}
#include "aligned_stack_impl.h"
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
View file @
aa270645
...
...
@@ -3,34 +3,36 @@
#define PLS_ALIGNED_STACK_IMPL_H
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
template
<
typename
T
>
T
*
aligned_stack
::
push
(
const
T
&
object
)
{
// Copy-Construct
return
new
((
void
*
)
push
<
T
>
())
T
(
object
);
}
template
<
typename
T
>
void
*
aligned_stack
::
push
()
{
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
// Move head to next aligned position after new object
head_
=
base
::
alignment
::
next_alignment
(
head_
+
sizeof
(
T
));
if
(
head_
>=
memory_end_
)
{
PLS_ERROR
(
"Tried to allocate object on alligned_stack without sufficient memory!"
);
}
return
result
;
}
template
<
typename
T
>
T
aligned_stack
::
pop
()
{
head_
=
head_
-
base
::
alignment
::
next_alignment
(
sizeof
(
T
));
return
*
reinterpret_cast
<
T
*>
(
head_
);
}
}
}
namespace
internal
{
namespace
data_structures
{
template
<
typename
T
>
T
*
aligned_stack
::
push
(
const
T
&
object
)
{
// Copy-Construct
return
new
((
void
*
)
push
<
T
>
())
T
(
object
);
}
template
<
typename
T
>
void
*
aligned_stack
::
push
()
{
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
// Move head to next aligned position after new object
head_
=
base
::
alignment
::
next_alignment
(
head_
+
sizeof
(
T
));
if
(
head_
>=
memory_end_
)
{
PLS_ERROR
(
"Tried to allocate object on alligned_stack without sufficient memory!"
);
}
return
result
;
}
template
<
typename
T
>
T
aligned_stack
::
pop
()
{
head_
=
head_
-
base
::
alignment
::
next_alignment
(
sizeof
(
T
));
return
*
reinterpret_cast
<
T
*>
(
head_
);
}
}
}
}
#endif //PLS_ALIGNED_STACK_IMPL_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/data_structures/deque.h
View file @
aa270645
...
...
@@ -5,56 +5,58 @@
#include "pls/internal/base/spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
/**
* Turns any object into deque item when inheriting from this.
*/
class
deque_item
{
friend
class
deque_internal
;
deque_item
*
prev_
;
deque_item
*
next_
;
};
class
deque_internal
{
protected
:
deque_item
*
head_
;
deque_item
*
tail_
;
base
::
spin_lock
lock_
;
deque_item
*
pop_head_internal
();
deque_item
*
pop_tail_internal
();
void
push_tail_internal
(
deque_item
*
new_item
);
};
/**
* A double linked list based deque.
* Storage is therefore only needed for the individual items.
*
* @tparam Item The type of items stored in this deque
*/
template
<
typename
Item
>
class
deque
:
deque_internal
{
public
:
explicit
deque
()
:
deque_internal
{}
{}
inline
Item
*
pop_head
()
{
return
static_cast
<
Item
*>
(
pop_head_internal
());
}
inline
Item
*
pop_tail
()
{
return
static_cast
<
Item
*>
(
pop_tail_internal
());
}
inline
void
push_tail
(
Item
*
new_item
)
{
push_tail_internal
(
new_item
);
}
};
}
}
namespace
internal
{
namespace
data_structures
{
/**
* Turns any object into deque item when inheriting from this.
*/
class
deque_item
{
friend
class
deque_internal
;
deque_item
*
prev_
;
deque_item
*
next_
;
};
class
deque_internal
{
protected
:
deque_item
*
head_
;
deque_item
*
tail_
;
base
::
spin_lock
lock_
;
deque_item
*
pop_head_internal
();
deque_item
*
pop_tail_internal
();
void
push_tail_internal
(
deque_item
*
new_item
);
};
/**
* A double linked list based deque.
* Storage is therefore only needed for the individual items.
*
* @tparam Item The type of items stored in this deque
*/
template
<
typename
Item
>
class
deque
:
deque_internal
{
public
:
explicit
deque
()
:
deque_internal
{}
{}
inline
Item
*
pop_head
()
{
return
static_cast
<
Item
*>
(
pop_head_internal
());
}
inline
Item
*
pop_tail
()
{
return
static_cast
<
Item
*>
(
pop_tail_internal
());
}
inline
void
push_tail
(
Item
*
new_item
)
{
push_tail_internal
(
new_item
);
}
};
}
}
}
#endif //PLS_DEQUE_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/helpers/mini_benchmark.h
View file @
aa270645
...
...
@@ -9,45 +9,47 @@
#include <iostream>
namespace
pls
{
namespace
internal
{
namespace
helpers
{
// TODO: Clean up (separate into small functions and .cpp file)
template
<
typename
Function
>
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
unsigned
long
max_runtime_ms
=
1000
)
{
using
namespace
std
;
using
namespace
pls
::
internal
::
scheduling
;
malloc_scheduler_memory
scheduler_memory
{
max_threads
};
for
(
unsigned
int
num_threads
=
1
;
num_threads
<=
max_threads
;
num_threads
++
)
{
scheduler
local_scheduler
{
&
scheduler_memory
,
num_threads
};
chrono
::
high_resolution_clock
::
time_point
start_time
;
chrono
::
high_resolution_clock
::
time_point
end_time
;
unsigned
long
iterations
=
0
;
local_scheduler
.
perform_work
([
&
]
{
start_time
=
chrono
::
high_resolution_clock
::
now
();
end_time
=
start_time
;
chrono
::
high_resolution_clock
::
time_point
planned_end_time
=
start_time
+
chrono
::
milliseconds
(
max_runtime_ms
);
while
(
end_time
<
planned_end_time
)
{
lambda
();
end_time
=
chrono
::
high_resolution_clock
::
now
();
iterations
++
;
}
});
long
time
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
end_time
-
start_time
).
count
();
double
time_per_iteration
=
(
double
)
time
/
iterations
;
std
::
cout
<<
time_per_iteration
;
if
(
num_threads
<
max_threads
)
{
std
::
cout
<<
","
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
namespace
internal
{
namespace
helpers
{
// TODO: Clean up (separate into small functions and .cpp file)
template
<
typename
Function
>
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
unsigned
long
max_runtime_ms
=
1000
)
{
using
namespace
std
;
using
namespace
pls
::
internal
::
scheduling
;
malloc_scheduler_memory
scheduler_memory
{
max_threads
};
for
(
unsigned
int
num_threads
=
1
;
num_threads
<=
max_threads
;
num_threads
++
)
{
scheduler
local_scheduler
{
&
scheduler_memory
,
num_threads
};
chrono
::
high_resolution_clock
::
time_point
start_time
;
chrono
::
high_resolution_clock
::
time_point
end_time
;
unsigned
long
iterations
=
0
;
local_scheduler
.
perform_work
([
&
]
{
start_time
=
chrono
::
high_resolution_clock
::
now
();
end_time
=
start_time
;
chrono
::
high_resolution_clock
::
time_point
planned_end_time
=
start_time
+
chrono
::
milliseconds
(
max_runtime_ms
);
while
(
end_time
<
planned_end_time
)
{
lambda
();
end_time
=
chrono
::
high_resolution_clock
::
now
();
iterations
++
;
}
});
long
time
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
end_time
-
start_time
).
count
();
double
time_per_iteration
=
(
double
)
time
/
iterations
;
std
::
cout
<<
time_per_iteration
;
if
(
num_threads
<
max_threads
)
{
std
::
cout
<<
","
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/helpers/prohibit_new.h
View file @
aa270645
...
...
@@ -15,9 +15,9 @@
#ifdef NEW_LINK_ERROR
// This will cause a linker error if new is used in the code.
// We also exit if it is somehow still called.
inline
void
*
operator
new
(
std
::
size_t
)
{
extern
int
bare_new_erroneously_called
();
exit
(
bare_new_erroneously_called
()
|
1
);
inline
void
*
operator
new
(
std
::
size_t
)
{
extern
int
bare_new_erroneously_called
();
exit
(
bare_new_erroneously_called
()
|
1
);
}
#else
// Use this + debugging point to find out where we use a new
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/helpers/unique_id.h
View file @
aa270645
...
...
@@ -7,25 +7,27 @@
#include <stdint.h>
namespace
pls
{
namespace
internal
{
namespace
helpers
{
struct
unique_id
{
const
uint32_t
id_
;
const
std
::
type_info
&
type_
;
bool
operator
==
(
const
unique_id
&
other
)
const
{
return
id_
==
other
.
id_
&&
type_
==
other
.
type_
;
}
namespace
internal
{
namespace
helpers
{
static
constexpr
unique_id
create
(
const
uint32_t
id
)
{
return
unique_id
(
id
,
typeid
(
void
));
}
template
<
typename
...
T
>
static
constexpr
unique_id
create
()
{
return
unique_id
(
UINT32_MAX
,
typeid
(
std
::
tuple
<
T
...
>
));
}
private
:
explicit
constexpr
unique_id
(
const
uint32_t
id
,
const
std
::
type_info
&
type
)
:
id_
{
id
},
type_
{
type
}
{};
};
}
}
struct
unique_id
{
const
uint32_t
id_
;
const
std
::
type_info
&
type_
;
bool
operator
==
(
const
unique_id
&
other
)
const
{
return
id_
==
other
.
id_
&&
type_
==
other
.
type_
;
}
static
constexpr
unique_id
create
(
const
uint32_t
id
)
{
return
unique_id
(
id
,
typeid
(
void
));
}
template
<
typename
...
T
>
static
constexpr
unique_id
create
()
{
return
unique_id
(
UINT32_MAX
,
typeid
(
std
::
tuple
<
T
...
>
));
}
private
:
explicit
constexpr
unique_id
(
const
uint32_t
id
,
const
std
::
type_info
&
type
)
:
id_
{
id
},
type_
{
type
}
{};
};
}
}
}
#endif //PLS_UNIQUE_ID_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/abstract_task.h
View file @
aa270645
...
...
@@ -6,38 +6,40 @@
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
class
abstract_task
{
public
:
using
id
=
helpers
::
unique_id
;
private
:
unsigned
int
depth_
;
abstract_task
::
id
unique_id_
;
abstract_task
*
child_task_
;
public
:
abstract_task
(
const
unsigned
int
depth
,
const
abstract_task
::
id
&
unique_id
)
:
depth_
{
depth
},
unique_id_
{
unique_id
},
child_task_
{
nullptr
}
{}
virtual
void
execute
()
=
0
;
void
set_child
(
abstract_task
*
child_task
)
{
child_task_
=
child_task
;
}
abstract_task
*
child
()
{
return
child_task_
;
}
void
set_depth
(
unsigned
int
depth
)
{
depth_
=
depth
;
}
unsigned
int
depth
()
const
{
return
depth_
;
}
id
unique_id
()
const
{
return
unique_id_
;
}
protected
:
virtual
bool
internal_stealing
(
abstract_task
*
other_task
)
=
0
;
virtual
bool
split_task
(
base
::
spin_lock
*
lock
)
=
0
;
bool
steal_work
();
};
}
}
namespace
internal
{
namespace
scheduling
{
class
abstract_task
{
public
:
using
id
=
helpers
::
unique_id
;
private
:
unsigned
int
depth_
;
abstract_task
::
id
unique_id_
;
abstract_task
*
child_task_
;
public
:
abstract_task
(
const
unsigned
int
depth
,
const
abstract_task
::
id
&
unique_id
)
:
depth_
{
depth
},
unique_id_
{
unique_id
},
child_task_
{
nullptr
}
{}
virtual
void
execute
()
=
0
;
void
set_child
(
abstract_task
*
child_task
)
{
child_task_
=
child_task
;
}
abstract_task
*
child
()
{
return
child_task_
;
}
void
set_depth
(
unsigned
int
depth
)
{
depth_
=
depth
;
}
unsigned
int
depth
()
const
{
return
depth_
;
}
id
unique_id
()
const
{
return
unique_id_
;
}
protected
:
virtual
bool
internal_stealing
(
abstract_task
*
other_task
)
=
0
;
virtual
bool
split_task
(
base
::
spin_lock
*
lock
)
=
0
;
bool
steal_work
();
};
}
}
}
#endif //PLS_ABSTRACT_TASK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/fork_join_task.h
View file @
aa270645
...
...
@@ -11,87 +11,89 @@
#include "thread_state.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
class
fork_join_task
;
class
fork_join_sub_task
:
public
data_structures
::
deque_item
{
friend
class
fork_join_task
;
// Coordinate finishing of sub_tasks
std
::
atomic_uint32_t
ref_count_
;
fork_join_sub_task
*
parent_
;
// Access to TBB scheduling environment
fork_join_task
*
tbb_task_
;
// Stack Management (reset stack pointer after wait_for_all() calls)
data_structures
::
aligned_stack
::
state
stack_state_
;
protected
:
explicit
fork_join_sub_task
();
fork_join_sub_task
(
const
fork_join_sub_task
&
other
);
// Overwritten with behaviour of child tasks
virtual
void
execute_internal
()
=
0
;
public
:
// Only use them when actually executing this sub_task (only public for simpler API design)
template
<
typename
T
>
void
spawn_child
(
const
T
&
sub_task
);
void
wait_for_all
();
private
:
void
spawn_child_internal
(
fork_join_sub_task
*
sub_task
);
void
execute
();
};
template
<
typename
Function
>
class
fork_join_lambda
:
public
fork_join_sub_task
{
const
Function
*
function_
;
public
:
explicit
fork_join_lambda
(
const
Function
*
function
)
:
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
(
*
function_
)(
this
);
}
};
class
fork_join_task
:
public
abstract_task
{
friend
class
fork_join_sub_task
;
fork_join_sub_task
*
root_task_
;
fork_join_sub_task
*
currently_executing_
;
data_structures
::
aligned_stack
*
my_stack_
;
// Double-Ended Queue management
data_structures
::
deque
<
fork_join_sub_task
>
deque_
;
// Steal Management
fork_join_sub_task
*
last_stolen_
;
fork_join_sub_task
*
get_local_sub_task
();
fork_join_sub_task
*
get_stolen_sub_task
();
bool
internal_stealing
(
abstract_task
*
other_task
)
override
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
;
public
:
explicit
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
);
void
execute
()
override
;
fork_join_sub_task
*
currently_executing
()
const
;
};
template
<
typename
T
>
void
fork_join_sub_task
::
spawn_child
(
const
T
&
task
)
{
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
static_assert
(
std
::
is_base_of
<
fork_join_sub_task
,
T
>::
value
,
"Only pass fork_join_sub_task subclasses!"
);
T
*
new_task
=
tbb_task_
->
my_stack_
->
push
(
task
);
spawn_child_internal
(
new_task
);
}
}
}
namespace
internal
{
namespace
scheduling
{
class
fork_join_task
;
class
fork_join_sub_task
:
public
data_structures
::
deque_item
{
friend
class
fork_join_task
;
// Coordinate finishing of sub_tasks
std
::
atomic_uint32_t
ref_count_
;
fork_join_sub_task
*
parent_
;
// Access to TBB scheduling environment
fork_join_task
*
tbb_task_
;
// Stack Management (reset stack pointer after wait_for_all() calls)
data_structures
::
aligned_stack
::
state
stack_state_
;
protected
:
explicit
fork_join_sub_task
();
fork_join_sub_task
(
const
fork_join_sub_task
&
other
);
// Overwritten with behaviour of child tasks
virtual
void
execute_internal
()
=
0
;
public
:
// Only use them when actually executing this sub_task (only public for simpler API design)
template
<
typename
T
>
void
spawn_child
(
const
T
&
sub_task
);
void
wait_for_all
();
private
:
void
spawn_child_internal
(
fork_join_sub_task
*
sub_task
);
void
execute
();
};
template
<
typename
Function
>
class
fork_join_lambda
:
public
fork_join_sub_task
{
const
Function
*
function_
;
public
:
explicit
fork_join_lambda
(
const
Function
*
function
)
:
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
(
*
function_
)(
this
);
}
};
class
fork_join_task
:
public
abstract_task
{
friend
class
fork_join_sub_task
;
fork_join_sub_task
*
root_task_
;
fork_join_sub_task
*
currently_executing_
;
data_structures
::
aligned_stack
*
my_stack_
;
// Double-Ended Queue management
data_structures
::
deque
<
fork_join_sub_task
>
deque_
;
// Steal Management
fork_join_sub_task
*
last_stolen_
;
fork_join_sub_task
*
get_local_sub_task
();
fork_join_sub_task
*
get_stolen_sub_task
();
bool
internal_stealing
(
abstract_task
*
other_task
)
override
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
;
public
:
explicit
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
);
void
execute
()
override
;
fork_join_sub_task
*
currently_executing
()
const
;
};
template
<
typename
T
>
void
fork_join_sub_task
::
spawn_child
(
const
T
&
task
)
{
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
static_assert
(
std
::
is_base_of
<
fork_join_sub_task
,
T
>::
value
,
"Only pass fork_join_sub_task subclasses!"
);
T
*
new_task
=
tbb_task_
->
my_stack_
->
push
(
task
);
spawn_child_internal
(
new_task
);
}
}
}
}
#endif //PLS_TBB_LIKE_TASK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/root_task.h
View file @
aa270645
...
...
@@ -10,71 +10,73 @@
#include "abstract_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
root_task
:
public
abstract_task
{
Function
function_
;
std
::
atomic_uint8_t
finished_
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
root_task
<
Function
>>
;
explicit
root_task
(
Function
function
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
finished_
{
0
}
{}
root_task
(
const
root_task
&
other
)
:
abstract_task
{
0
,
create_id
()},
function_
{
other
.
function_
},
finished_
{
0
}
{}
bool
finished
()
{
return
finished_
;
}
void
execute
()
override
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
function_
();
finished_
=
1
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
template
<
typename
Function
>
class
root_worker_task
:
public
abstract_task
{
root_task
<
Function
>*
master_task_
;
public
:
static
constexpr
auto
create_id
=
root_task
<
Function
>::
create_id
;
explicit
root_worker_task
(
root_task
<
Function
>*
master_task
)
:
abstract_task
{
0
,
create_id
()},
master_task_
{
master_task
}
{}
void
execute
()
override
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
do
{
steal_work
();
}
while
(
!
master_task_
->
finished
());
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
}
}
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
root_task
:
public
abstract_task
{
Function
function_
;
std
::
atomic_uint8_t
finished_
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
root_task
<
Function
>>
;
explicit
root_task
(
Function
function
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
finished_
{
0
}
{}
root_task
(
const
root_task
&
other
)
:
abstract_task
{
0
,
create_id
()},
function_
{
other
.
function_
},
finished_
{
0
}
{}
bool
finished
()
{
return
finished_
;
}
void
execute
()
override
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
function_
();
finished_
=
1
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
template
<
typename
Function
>
class
root_worker_task
:
public
abstract_task
{
root_task
<
Function
>
*
master_task_
;
public
:
static
constexpr
auto
create_id
=
root_task
<
Function
>::
create_id
;
explicit
root_worker_task
(
root_task
<
Function
>
*
master_task
)
:
abstract_task
{
0
,
create_id
()},
master_task_
{
master_task
}
{}
void
execute
()
override
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
do
{
steal_work
();
}
while
(
!
master_task_
->
finished
());
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
}
}
}
#endif //PLS_ROOT_MASTER_TASK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/run_on_n_threads_task.h
View file @
aa270645
...
...
@@ -12,107 +12,110 @@
#include "scheduler.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
run_on_n_threads_task
:
public
abstract_task
{
template
<
typename
F
>
friend
class
run_on_n_threads_task_worker
;
Function
function_
;
// Improvement: Remove lock and replace by atomic variable (performance)
int
counter
;
base
::
spin_lock
counter_lock_
;
int
decrement_counter
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
counter
--
;
return
counter
;
}
int
get_counter
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
return
counter
;
}
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task
<
Function
>>
;
run_on_n_threads_task
(
Function
function
,
int
num_threads
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
counter
{
num_threads
-
1
}
{}
void
execute
()
override
{
// Execute our function ONCE
function_
();
// Steal until we are finished (other threads executed)
do
{
steal_work
();
}
while
(
get_counter
()
>
0
);
std
::
cout
<<
"Finished Master!"
<<
std
::
endl
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
lock
)
override
;
};
template
<
typename
Function
>
class
run_on_n_threads_task_worker
:
public
abstract_task
{
Function
function_
;
run_on_n_threads_task
<
Function
>*
root_
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task_worker
<
Function
>>
;
run_on_n_threads_task_worker
(
Function
function
,
run_on_n_threads_task
<
Function
>*
root
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
root_
{
root
}
{}
void
execute
()
override
{
if
(
root_
->
decrement_counter
()
>=
0
)
{
function_
();
std
::
cout
<<
"Finished Worker!"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"Abandoned Worker!"
<<
std
::
endl
;
}
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
template
<
typename
Function
>
bool
run_on_n_threads_task
<
Function
>::
split_task
(
base
::
spin_lock
*
lock
)
{
if
(
get_counter
()
<=
0
)
{
return
false
;
}
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
auto
scheduler
=
base
::
this_thread
::
state
<
thread_state
>
()
->
scheduler_
;
auto
task
=
run_on_n_threads_task_worker
<
Function
>
{
function_
,
this
};
scheduler
->
execute_task
(
task
,
depth
());
return
true
;
}
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>
create_run_on_n_threads_task
(
Function
function
,
int
num_threads
)
{
return
run_on_n_threads_task
<
Function
>
{
function
,
num_threads
};
}
}
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
class
run_on_n_threads_task
:
public
abstract_task
{
template
<
typename
F
>
friend
class
run_on_n_threads_task_worker
;
Function
function_
;
// Improvement: Remove lock and replace by atomic variable (performance)
int
counter
;
base
::
spin_lock
counter_lock_
;
int
decrement_counter
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
counter
--
;
return
counter
;
}
int
get_counter
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
return
counter
;
}
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task
<
Function
>>
;
run_on_n_threads_task
(
Function
function
,
int
num_threads
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
counter
{
num_threads
-
1
}
{}
void
execute
()
override
{
// Execute our function ONCE
function_
();
// Steal until we are finished (other threads executed)
do
{
steal_work
();
}
while
(
get_counter
()
>
0
);
std
::
cout
<<
"Finished Master!"
<<
std
::
endl
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
lock
)
override
;
};
template
<
typename
Function
>
class
run_on_n_threads_task_worker
:
public
abstract_task
{
Function
function_
;
run_on_n_threads_task
<
Function
>
*
root_
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task_worker
<
Function
>>
;
run_on_n_threads_task_worker
(
Function
function
,
run_on_n_threads_task
<
Function
>
*
root
)
:
abstract_task
{
0
,
create_id
()},
function_
{
function
},
root_
{
root
}
{}
void
execute
()
override
{
if
(
root_
->
decrement_counter
()
>=
0
)
{
function_
();
std
::
cout
<<
"Finished Worker!"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"Abandoned Worker!"
<<
std
::
endl
;
}
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
template
<
typename
Function
>
bool
run_on_n_threads_task
<
Function
>::
split_task
(
base
::
spin_lock
*
lock
)
{
if
(
get_counter
()
<=
0
)
{
return
false
;
}
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
auto
scheduler
=
base
::
this_thread
::
state
<
thread_state
>
()
->
scheduler_
;
auto
task
=
run_on_n_threads_task_worker
<
Function
>
{
function_
,
this
};
scheduler
->
execute_task
(
task
,
depth
());
return
true
;
}
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>
create_run_on_n_threads_task
(
Function
function
,
int
num_threads
)
{
return
run_on_n_threads_task
<
Function
>
{
function
,
num_threads
};
}
}
}
}
#endif //PLS_RUN_ON_N_THREADS_TASK_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
aa270645
...
...
@@ -17,50 +17,52 @@
#include "scheduler_memory.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
class
scheduler
{
friend
void
worker_routine
();
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
base
::
barrier
sync_barrier_
;
bool
terminated_
;
public
:
explicit
scheduler
(
scheduler_memory
*
memory
,
unsigned
int
num_threads
);
~
scheduler
();
/**
* Wakes up the thread pool.
* Code inside the Function lambda can invoke all parallel APIs.
*
* @param work_section generic function or lambda to be executed in the scheduler's context.
*/
template
<
typename
Function
>
void
perform_work
(
Function
work_section
);
/**
* Executes a top-level-task (children of abstract_task) on this thread.
*
* @param task The task to be executed.
* @param depth Optional: depth of the new task, otherwise set implicitly.
*/
template
<
typename
Task
>
static
void
execute_task
(
Task
&
task
,
int
depth
=-
1
);
static
abstract_task
*
current_task
()
{
return
base
::
this_thread
::
state
<
thread_state
>
()
->
current_task_
;
}
void
terminate
(
bool
wait_for_workers
=
true
);
unsigned
int
num_threads
()
const
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
};
}
}
namespace
internal
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
class
scheduler
{
friend
void
worker_routine
();
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
base
::
barrier
sync_barrier_
;
bool
terminated_
;
public
:
explicit
scheduler
(
scheduler_memory
*
memory
,
unsigned
int
num_threads
);
~
scheduler
();
/**
* Wakes up the thread pool.
* Code inside the Function lambda can invoke all parallel APIs.
*
* @param work_section generic function or lambda to be executed in the scheduler's context.
*/
template
<
typename
Function
>
void
perform_work
(
Function
work_section
);
/**
* Executes a top-level-task (children of abstract_task) on this thread.
*
* @param task The task to be executed.
* @param depth Optional: depth of the new task, otherwise set implicitly.
*/
template
<
typename
Task
>
static
void
execute_task
(
Task
&
task
,
int
depth
=
-
1
);
static
abstract_task
*
current_task
()
{
return
base
::
this_thread
::
state
<
thread_state
>
()
->
current_task_
;
}
void
terminate
(
bool
wait_for_workers
=
true
);
unsigned
int
num_threads
()
const
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
};
}
}
}
#include "scheduler_impl.h"
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
aa270645
...
...
@@ -3,70 +3,72 @@
#define PLS_SCHEDULER_IMPL_H
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
root_task
<
Function
>
master
{
work_section
};
// Push root task on stacks
auto
new_master
=
memory_
->
task_stack_for
(
0
)
->
push
(
master
);
memory_
->
thread_state_for
(
0
)
->
root_task_
=
new_master
;
memory_
->
thread_state_for
(
0
)
->
current_task_
=
new_master
;
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
root_worker_task
<
Function
>
worker
{
new_master
};
auto
new_worker
=
memory_
->
task_stack_for
(
0
)
->
push
(
worker
);
memory_
->
thread_state_for
(
i
)
->
root_task_
=
new_worker
;
memory_
->
thread_state_for
(
i
)
->
current_task_
=
new_worker
;
}
// Perform and wait for work
sync_barrier_
.
wait
();
// Trigger threads to wake up
sync_barrier_
.
wait
();
// Wait for threads to finish
// Clean up stack
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
master
)
>
();
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
root_worker_task
<
Function
>
worker
{
new_master
};
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
worker
)
>
();
}
}
template
<
typename
Task
>
void
scheduler
::
execute_task
(
Task
&
task
,
int
depth
)
{
static_assert
(
std
::
is_base_of
<
abstract_task
,
Task
>::
value
,
"Only pass abstract_task subclasses!"
);
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
abstract_task
*
old_task
;
abstract_task
*
new_task
;
// Init Task
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
old_task
=
my_state
->
current_task_
;
new_task
=
my_state
->
task_stack_
->
push
(
task
);
new_task
->
set_depth
(
depth
>=
0
?
depth
:
old_task
->
depth
()
+
1
);
my_state
->
current_task_
=
new_task
;
old_task
->
set_child
(
new_task
);
}
// Run Task
new_task
->
execute
();
// Teardown state back to before the task was executed
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
old_task
->
set_child
(
nullptr
);
my_state
->
current_task_
=
old_task
;
my_state
->
task_stack_
->
pop
<
Task
>
();
}
}
}
}
namespace
internal
{
namespace
scheduling
{
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
root_task
<
Function
>
master
{
work_section
};
// Push root task on stacks
auto
new_master
=
memory_
->
task_stack_for
(
0
)
->
push
(
master
);
memory_
->
thread_state_for
(
0
)
->
root_task_
=
new_master
;
memory_
->
thread_state_for
(
0
)
->
current_task_
=
new_master
;
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
root_worker_task
<
Function
>
worker
{
new_master
};
auto
new_worker
=
memory_
->
task_stack_for
(
0
)
->
push
(
worker
);
memory_
->
thread_state_for
(
i
)
->
root_task_
=
new_worker
;
memory_
->
thread_state_for
(
i
)
->
current_task_
=
new_worker
;
}
// Perform and wait for work
sync_barrier_
.
wait
();
// Trigger threads to wake up
sync_barrier_
.
wait
();
// Wait for threads to finish
// Clean up stack
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
master
)
>
();
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
root_worker_task
<
Function
>
worker
{
new_master
};
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
worker
)
>
();
}
}
template
<
typename
Task
>
void
scheduler
::
execute_task
(
Task
&
task
,
int
depth
)
{
static_assert
(
std
::
is_base_of
<
abstract_task
,
Task
>::
value
,
"Only pass abstract_task subclasses!"
);
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
abstract_task
*
old_task
;
abstract_task
*
new_task
;
// Init Task
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
old_task
=
my_state
->
current_task_
;
new_task
=
my_state
->
task_stack_
->
push
(
task
);
new_task
->
set_depth
(
depth
>=
0
?
depth
:
old_task
->
depth
()
+
1
);
my_state
->
current_task_
=
new_task
;
old_task
->
set_child
(
new_task
);
}
// Run Task
new_task
->
execute
();
// Teardown state back to before the task was executed
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
old_task
->
set_child
(
nullptr
);
my_state
->
current_task_
=
old_task
;
my_state
->
task_stack_
->
pop
<
Task
>
();
}
}
}
}
}
#endif //PLS_SCHEDULER_IMPL_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
View file @
aa270645
...
...
@@ -7,72 +7,75 @@
#define PLS_SCHEDULER_MEMORY_H
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
namespace
internal
{
namespace
scheduling
{
class
scheduler_memory
{
public
:
virtual
size_t
max_threads
()
const
=
0
;
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
};
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
class
static_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_thread_stack
=
base
::
alignment
::
aligned_wrapper
<
std
::
array
<
char
,
TASK_STACK_SIZE
>>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
class
scheduler_memory
{
public
:
virtual
size_t
max_threads
()
const
=
0
;
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
};
std
::
array
<
aligned_thread
,
MAX_THREADS
>
threads_
;
std
::
array
<
aligned_thread_state
,
MAX_THREADS
>
thread_states_
;
std
::
array
<
aligned_thread_stack
,
MAX_THREADS
>
task_stacks_memory_
;
std
::
array
<
aligned_aligned_stack
,
MAX_THREADS
>
task_stacks_
;
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
class
static_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_thread_stack
=
base
::
alignment
::
aligned_wrapper
<
std
::
array
<
char
,
TASK_STACK_SIZE
>>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
public
:
static_scheduler_memory
()
{
for
(
size_t
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
TASK_STACK_SIZE
);
}
}
std
::
array
<
aligned_thread
,
MAX_THREADS
>
threads_
;
std
::
array
<
aligned_thread_state
,
MAX_THREADS
>
thread_states_
;
std
::
array
<
aligned_thread_stack
,
MAX_THREADS
>
task_stacks_memory_
;
std
::
array
<
aligned_aligned_stack
,
MAX_THREADS
>
task_stacks_
;
size_t
max_threads
()
const
override
{
return
MAX_THREADS
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
public
:
static_scheduler_memory
()
{
for
(
size_t
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
TASK_STACK_SIZE
);
}
}
class
malloc_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
size_t
max_threads
()
const
override
{
return
MAX_THREADS
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
const
size_t
num_threads_
;
class
malloc_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
aligned_thread
*
threads_
;
aligned_thread_state
*
thread_states_
;
char
**
task_stacks_memory_
;
aligned_aligned_stack
*
task_stacks_
;
public
:
explicit
malloc_scheduler_memory
(
size_t
num_threads
,
size_t
memory_per_stack
=
2
<<
16
);
~
malloc_scheduler_memory
();
const
size_t
num_threads_
;
size_t
max_threads
()
const
override
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
}
}
aligned_thread
*
threads_
;
aligned_thread_state
*
thread_states_
;
char
**
task_stacks_memory_
;
aligned_aligned_stack
*
task_stacks_
;
public
:
explicit
malloc_scheduler_memory
(
size_t
num_threads
,
size_t
memory_per_stack
=
2
<<
16
);
~
malloc_scheduler_memory
();
size_t
max_threads
()
const
override
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
}
}
}
#endif //PLS_SCHEDULER_MEMORY_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
aa270645
...
...
@@ -8,38 +8,40 @@
#include "abstract_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
// forward declaration
class
scheduler
;
struct
thread_state
{
scheduler
*
scheduler_
;
abstract_task
*
root_task_
;
abstract_task
*
current_task_
;
data_structures
::
aligned_stack
*
task_stack_
;
size_t
id_
;
base
::
spin_lock
lock_
;
std
::
minstd_rand
random_
;
thread_state
()
:
scheduler_
{
nullptr
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
nullptr
},
id_
{
0
},
random_
{
id_
}
{};
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
scheduler_
{
scheduler
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
task_stack
},
id_
{
id
},
random_
{
id_
}
{}
};
}
}
namespace
internal
{
namespace
scheduling
{
// forward declaration
class
scheduler
;
struct
thread_state
{
scheduler
*
scheduler_
;
abstract_task
*
root_task_
;
abstract_task
*
current_task_
;
data_structures
::
aligned_stack
*
task_stack_
;
size_t
id_
;
base
::
spin_lock
lock_
;
std
::
minstd_rand
random_
;
thread_state
()
:
scheduler_
{
nullptr
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
nullptr
},
id_
{
0
},
random_
{
id_
}
{};
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
scheduler_
{
scheduler
},
root_task_
{
nullptr
},
current_task_
{
nullptr
},
task_stack_
{
task_stack
},
id_
{
id
},
random_
{
id_
}
{}
};
}
}
}
#endif //PLS_THREAD_STATE_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/pls.h
View file @
aa270645
...
...
@@ -8,18 +8,20 @@
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
using
internal
::
scheduling
::
static_scheduler_memory
;
using
internal
::
scheduling
::
malloc_scheduler_memory
;
using
internal
::
scheduling
::
scheduler
;
using
task_id
=
internal
::
scheduling
::
abstract_task
::
id
;
using
internal
::
scheduling
::
static_scheduler_memory
;
using
internal
::
scheduling
::
malloc_scheduler_memory
;
using
unique_id
=
internal
::
helpers
::
unique_id
;
using
internal
::
scheduling
::
scheduler
;
using
task_id
=
internal
::
scheduling
::
abstract_task
::
id
;
using
internal
::
scheduling
::
fork_join_sub_task
;
using
internal
::
scheduling
::
fork_join_task
;
using
unique_id
=
internal
::
helpers
::
unique_id
;
using
internal
::
scheduling
::
fork_join_sub_task
;
using
internal
::
scheduling
::
fork_join_task
;
using
algorithm
::
invoke_parallel
;
using
algorithm
::
invoke_parallel
;
}
#endif
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/base/alignment.cpp
View file @
aa270645
...
...
@@ -2,26 +2,28 @@
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
void
*
allocate_aligned
(
size_t
size
)
{
return
aligned_alloc
(
system_details
::
CACHE_LINE_SIZE
,
size
);
}
namespace
internal
{
namespace
base
{
namespace
alignment
{
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
std
::
uintptr_t
miss_alignment
=
size
%
base
::
system_details
::
CACHE_LINE_SIZE
;
if
(
miss_alignment
==
0
)
{
return
size
;
}
else
{
return
size
+
(
base
::
system_details
::
CACHE_LINE_SIZE
-
miss_alignment
);
}
}
void
*
allocate_aligned
(
size_t
size
)
{
return
aligned_alloc
(
system_details
::
CACHE_LINE_SIZE
,
size
);
}
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
std
::
uintptr_t
miss_alignment
=
size
%
base
::
system_details
::
CACHE_LINE_SIZE
;
if
(
miss_alignment
==
0
)
{
return
size
;
}
else
{
return
size
+
(
base
::
system_details
::
CACHE_LINE_SIZE
-
miss_alignment
);
}
}
char
*
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
char
*
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
}
}
}
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/base/barrier.cpp
View file @
aa270645
#include "pls/internal/base/barrier.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
barrier
::
barrier
(
const
unsigned
int
count
)
:
barrier_
{}
{
pthread_barrier_init
(
&
barrier_
,
nullptr
,
count
);
}
namespace
internal
{
namespace
base
{
barrier
::~
barrier
()
{
pthread_barrier_destroy
(
&
barrier_
);
}
barrier
::
barrier
(
const
unsigned
int
count
)
:
barrier_
{}
{
pthread_barrier_init
(
&
barrier_
,
nullptr
,
count
);
}
barrier
::~
barrier
()
{
pthread_barrier_destroy
(
&
barrier_
);
}
void
barrier
::
wait
()
{
pthread_barrier_wait
(
&
barrier_
);
}
}
}
void
barrier
::
wait
()
{
pthread_barrier_wait
(
&
barrier_
);
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/base/tas_spin_lock.cpp
View file @
aa270645
...
...
@@ -2,33 +2,35 @@
#include "pls/internal/base/tas_spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
void
tas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
int
tries
=
0
;
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
tries
++
;
if
(
tries
%
yield_at_tries_
==
0
)
{
this_thread
::
yield
();
}
}
}
namespace
internal
{
namespace
base
{
bool
tas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try
Acquire Lock"
)
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
return
true
;
}
void
tas_spin_lock
::
lock
(
)
{
PROFILE_LOCK
(
"
Acquire Lock"
)
int
tries
=
0
;
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
tries
++
;
if
(
tries
%
yield_at_tries_
==
0
)
{
this_thread
::
yield
();
}
}
}
void
tas_spin_lock
::
unlock
()
{
flag_
.
clear
(
std
::
memory_order_release
);
}
}
bool
tas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
return
true
;
}
void
tas_spin_lock
::
unlock
()
{
flag_
.
clear
(
std
::
memory_order_release
);
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/base/thread.cpp
View file @
aa270645
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
internal
{
namespace
base
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_key_t
this_thread
::
local_storage_key_
=
false
;
bool
this_thread
::
local_storage_key_initialized_
;
pthread_key_t
this_thread
::
local_storage_key_
=
false
;
bool
this_thread
::
local_storage_key_initialized_
;
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
__thread
void
*
this_thread
::
local_state_
;
__thread
void
*
this_thread
::
local_state_
;
#endif
// implementation in header (C++ templating)
}
}
// implementation in header (C++ templating)
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/base/ttas_spin_lock.cpp
View file @
aa270645
...
...
@@ -2,46 +2,48 @@
#include "pls/internal/base/ttas_spin_lock.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
void
ttas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
int
tries
=
0
;
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
tries
++
;
if
(
tries
%
yield_at_tries_
==
0
)
{
this_thread
::
yield
();
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
}
bool
ttas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
return
true
;
}
void
ttas_spin_lock
::
unlock
()
{
flag_
.
store
(
0
,
std
::
memory_order_release
);
}
}
namespace
internal
{
namespace
base
{
void
ttas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
int
tries
=
0
;
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
tries
++
;
if
(
tries
%
yield_at_tries_
==
0
)
{
this_thread
::
yield
();
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
}
bool
ttas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
return
true
;
}
void
ttas_spin_lock
::
unlock
()
{
flag_
.
store
(
0
,
std
::
memory_order_release
);
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/data_structures/aligned_stack.cpp
View file @
aa270645
...
...
@@ -2,12 +2,14 @@
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
memory_start_
{
memory_region
},
memory_end_
{
memory_region
+
size
},
head_
{
base
::
alignment
::
next_alignment
(
memory_start_
)}
{}
}
}
namespace
internal
{
namespace
data_structures
{
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
memory_start_
{
memory_region
},
memory_end_
{
memory_region
+
size
},
head_
{
base
::
alignment
::
next_alignment
(
memory_start_
)}
{}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/data_structures/deque.cpp
View file @
aa270645
...
...
@@ -3,56 +3,58 @@
#include "pls/internal/data_structures/deque.h"
namespace
pls
{
namespace
internal
{
namespace
data_structures
{
deque_item
*
deque_internal
::
pop_head_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
head_
==
nullptr
)
{
return
nullptr
;
}
deque_item
*
result
=
head_
;
head_
=
head_
->
prev_
;
if
(
head_
==
nullptr
)
{
tail_
=
nullptr
;
}
else
{
head_
->
next_
=
nullptr
;
}
return
result
;
}
deque_item
*
deque_internal
::
pop_tail_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
==
nullptr
)
{
return
nullptr
;
}
deque_item
*
result
=
tail_
;
tail_
=
tail_
->
next_
;
if
(
tail_
==
nullptr
)
{
head_
=
nullptr
;
}
else
{
tail_
->
prev_
=
nullptr
;
}
return
result
;
}
void
deque_internal
::
push_tail_internal
(
deque_item
*
new_item
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
!=
nullptr
)
{
tail_
->
prev_
=
new_item
;
}
else
{
head_
=
new_item
;
}
new_item
->
next_
=
tail_
;
new_item
->
prev_
=
nullptr
;
tail_
=
new_item
;
}
}
}
namespace
internal
{
namespace
data_structures
{
deque_item
*
deque_internal
::
pop_head_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
head_
==
nullptr
)
{
return
nullptr
;
}
deque_item
*
result
=
head_
;
head_
=
head_
->
prev_
;
if
(
head_
==
nullptr
)
{
tail_
=
nullptr
;
}
else
{
head_
->
next_
=
nullptr
;
}
return
result
;
}
deque_item
*
deque_internal
::
pop_tail_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
==
nullptr
)
{
return
nullptr
;
}
deque_item
*
result
=
tail_
;
tail_
=
tail_
->
next_
;
if
(
tail_
==
nullptr
)
{
head_
=
nullptr
;
}
else
{
tail_
->
prev_
=
nullptr
;
}
return
result
;
}
void
deque_internal
::
push_tail_internal
(
deque_item
*
new_item
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
!=
nullptr
)
{
tail_
->
prev_
=
new_item
;
}
else
{
head_
=
new_item
;
}
new_item
->
next_
=
tail_
;
new_item
->
prev_
=
nullptr
;
tail_
=
new_item
;
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/abstract_task.cpp
View file @
aa270645
...
...
@@ -5,72 +5,74 @@
#include "pls/internal/scheduling/scheduler.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
bool
abstract_task
::
steal_work
()
{
PROFILE_STEALING
(
"abstract_task::steal_work"
)
const
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
const
auto
my_scheduler
=
my_state
->
scheduler_
;
namespace
internal
{
namespace
scheduling
{
const
size_t
my_id
=
my_state
->
id_
;
const
size_t
offset
=
my_state
->
random_
()
%
my_scheduler
->
num_threads
();
const
size_t
max_tries
=
1
;
// my_scheduler->num_threads(); TODO: Tune this value
for
(
size_t
i
=
0
;
i
<
max_tries
;
i
++
)
{
size_t
target
=
(
offset
+
i
)
%
my_scheduler
->
num_threads
();
if
(
target
==
my_id
)
{
continue
;
}
auto
target_state
=
my_scheduler
->
thread_state_for
(
target
);
bool
abstract_task
::
steal_work
()
{
PROFILE_STEALING
(
"abstract_task::steal_work"
)
const
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
const
auto
my_scheduler
=
my_state
->
scheduler_
;
// TODO: Cleaner Locking Using std::guarded_lock
target_state
->
lock_
.
lock
();
const
size_t
my_id
=
my_state
->
id_
;
const
size_t
offset
=
my_state
->
random_
()
%
my_scheduler
->
num_threads
();
const
size_t
max_tries
=
1
;
// my_scheduler->num_threads(); TODO: Tune this value
for
(
size_t
i
=
0
;
i
<
max_tries
;
i
++
)
{
size_t
target
=
(
offset
+
i
)
%
my_scheduler
->
num_threads
();
if
(
target
==
my_id
)
{
continue
;
}
auto
target_state
=
my_scheduler
->
thread_state_for
(
target
);
// Dig down to our level
PROFILE_STEALING
(
"Go to our level"
)
abstract_task
*
current_task
=
target_state
->
root_task_
;
while
(
current_task
!=
nullptr
&&
current_task
->
depth
()
<
depth
())
{
current_task
=
current_task
->
child_task_
;
}
PROFILE_END_BLOCK
// TODO: Cleaner Locking Using std::guarded_lock
target_state
->
lock_
.
lock
();
// Try to steal 'internal', e.g. for_join_sub_tasks in a fork_join_task constellation
PROFILE_STEALING
(
"Internal Steal"
)
if
(
current_task
!=
nullptr
)
{
// See if it equals our type and depth of task
if
(
current_task
->
unique_id_
==
unique_id_
&&
current_task
->
depth_
==
depth_
)
{
if
(
internal_stealing
(
current_task
))
{
// internal steal was a success, hand it back to the internal scheduler
target_state
->
lock_
.
unlock
();
return
true
;
}
// Dig down to our level
PROFILE_STEALING
(
"Go to our level"
)
abstract_task
*
current_task
=
target_state
->
root_task_
;
while
(
current_task
!=
nullptr
&&
current_task
->
depth
()
<
depth
())
{
current_task
=
current_task
->
child_task_
;
}
PROFILE_END_BLOCK
// No success, we need to steal work from a deeper level using 'top level task stealing'
current_task
=
current_task
->
child_task_
;
}
}
PROFILE_END_BLOCK
;
// Try to steal 'internal', e.g. for_join_sub_tasks in a fork_join_task constellation
PROFILE_STEALING
(
"Internal Steal"
)
if
(
current_task
!=
nullptr
)
{
// See if it equals our type and depth of task
if
(
current_task
->
unique_id_
==
unique_id_
&&
current_task
->
depth_
==
depth_
)
{
if
(
internal_stealing
(
current_task
))
{
// internal steal was a success, hand it back to the internal scheduler
target_state
->
lock_
.
unlock
();
return
true
;
}
// No success, we need to steal work from a deeper level using 'top level task stealing'
current_task
=
current_task
->
child_task_
;
}
}
PROFILE_END_BLOCK
;
// Execute 'top level task steal' if possible
// (only try deeper tasks to keep depth restricted stealing).
PROFILE_STEALING
(
"Top Level Steal"
)
while
(
current_task
!=
nullptr
)
{
auto
lock
=
&
target_state
->
lock_
;
if
(
current_task
->
split_task
(
lock
))
{
// internal steal was no success (we did a top level task steal)
return
false
;
}
current_task
=
current_task
->
child_task_
;
}
PROFILE_END_BLOCK
;
target_state
->
lock_
.
unlock
();
}
// Execute 'top level task steal' if possible
// (only try deeper tasks to keep depth restricted stealing).
PROFILE_STEALING
(
"Top Level Steal"
)
while
(
current_task
!=
nullptr
)
{
auto
lock
=
&
target_state
->
lock_
;
if
(
current_task
->
split_task
(
lock
))
{
// internal steal was no success (we did a top level task steal)
return
false
;
}
// internal steal was no success
return
false
;
};
}
current_task
=
current_task
->
child_task_
;
}
PROFILE_END_BLOCK
;
target_state
->
lock_
.
unlock
();
}
// internal steal was no success
return
false
;
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/fork_join_task.cpp
View file @
aa270645
...
...
@@ -4,131 +4,133 @@
#include "pls/internal/scheduling/fork_join_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
fork_join_sub_task
::
fork_join_sub_task
()
:
data_structures
::
deque_item
{},
ref_count_
{
0
},
parent_
{
nullptr
},
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
fork_join_sub_task
::
fork_join_sub_task
(
const
fork_join_sub_task
&
other
)
:
data_structures
::
deque_item
(
other
),
ref_count_
{
0
},
parent_
{
nullptr
},
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
void
fork_join_sub_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute sub_task"
)
tbb_task_
->
currently_executing_
=
this
;
execute_internal
();
tbb_task_
->
currently_executing_
=
nullptr
;
PROFILE_END_BLOCK
wait_for_all
();
if
(
parent_
!=
nullptr
)
{
parent_
->
ref_count_
--
;
}
}
void
fork_join_sub_task
::
spawn_child_internal
(
fork_join_sub_task
*
sub_task
)
{
// Keep our refcount up to date
ref_count_
++
;
// Assign forced values
sub_task
->
parent_
=
this
;
sub_task
->
tbb_task_
=
tbb_task_
;
sub_task
->
stack_state_
=
tbb_task_
->
my_stack_
->
save_state
();
tbb_task_
->
deque_
.
push_tail
(
sub_task
);
}
void
fork_join_sub_task
::
wait_for_all
()
{
while
(
ref_count_
>
0
)
{
PROFILE_STEALING
(
"get local sub task"
)
fork_join_sub_task
*
local_task
=
tbb_task_
->
get_local_sub_task
();
PROFILE_END_BLOCK
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
}
else
{
// Try to steal work.
// External steal will be executed implicitly if success
PROFILE_STEALING
(
"steal work"
)
bool
internal_steal_success
=
tbb_task_
->
steal_work
();
PROFILE_END_BLOCK
if
(
internal_steal_success
)
{
tbb_task_
->
last_stolen_
->
execute
();
}
}
}
tbb_task_
->
my_stack_
->
reset_state
(
stack_state_
);
}
fork_join_sub_task
*
fork_join_task
::
get_local_sub_task
()
{
return
deque_
.
pop_tail
();
}
fork_join_sub_task
*
fork_join_task
::
get_stolen_sub_task
()
{
return
deque_
.
pop_head
();
}
bool
fork_join_task
::
internal_stealing
(
abstract_task
*
other_task
)
{
PROFILE_STEALING
(
"fork_join_task::internal_stealin"
)
auto
cast_other_task
=
reinterpret_cast
<
fork_join_task
*>
(
other_task
);
auto
stolen_sub_task
=
cast_other_task
->
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
else
{
// Make sub-task belong to our fork_join_task instance
stolen_sub_task
->
tbb_task_
=
this
;
stolen_sub_task
->
stack_state_
=
my_stack_
->
save_state
();
// We will execute this next without explicitly moving it onto our stack storage
last_stolen_
=
stolen_sub_task
;
return
true
;
}
}
bool
fork_join_task
::
split_task
(
base
::
spin_lock
*
lock
)
{
PROFILE_STEALING
(
"fork_join_task::split_task"
)
fork_join_sub_task
*
stolen_sub_task
=
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
fork_join_task
task
{
stolen_sub_task
,
this
->
unique_id
()};
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
scheduler
::
execute_task
(
task
,
depth
());
return
true
;
}
void
fork_join_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute fork_join_task"
);
// Bind this instance to our OS thread
my_stack_
=
base
::
this_thread
::
state
<
thread_state
>
()
->
task_stack_
;
root_task_
->
tbb_task_
=
this
;
root_task_
->
stack_state_
=
my_stack_
->
save_state
();
// Execute it on our OS thread until its finished
root_task_
->
execute
();
}
fork_join_sub_task
*
fork_join_task
::
currently_executing
()
const
{
return
currently_executing_
;
}
fork_join_task
::
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
)
:
abstract_task
{
0
,
id
},
root_task_
{
root_task
},
currently_executing_
{
nullptr
},
my_stack_
{
nullptr
},
deque_
{},
last_stolen_
{
nullptr
}
{};
}
namespace
internal
{
namespace
scheduling
{
fork_join_sub_task
::
fork_join_sub_task
()
:
data_structures
::
deque_item
{},
ref_count_
{
0
},
parent_
{
nullptr
},
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
fork_join_sub_task
::
fork_join_sub_task
(
const
fork_join_sub_task
&
other
)
:
data_structures
::
deque_item
(
other
),
ref_count_
{
0
},
parent_
{
nullptr
},
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
void
fork_join_sub_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute sub_task"
)
tbb_task_
->
currently_executing_
=
this
;
execute_internal
();
tbb_task_
->
currently_executing_
=
nullptr
;
PROFILE_END_BLOCK
wait_for_all
();
if
(
parent_
!=
nullptr
)
{
parent_
->
ref_count_
--
;
}
}
void
fork_join_sub_task
::
spawn_child_internal
(
fork_join_sub_task
*
sub_task
)
{
// Keep our refcount up to date
ref_count_
++
;
// Assign forced values
sub_task
->
parent_
=
this
;
sub_task
->
tbb_task_
=
tbb_task_
;
sub_task
->
stack_state_
=
tbb_task_
->
my_stack_
->
save_state
();
tbb_task_
->
deque_
.
push_tail
(
sub_task
);
}
void
fork_join_sub_task
::
wait_for_all
()
{
while
(
ref_count_
>
0
)
{
PROFILE_STEALING
(
"get local sub task"
)
fork_join_sub_task
*
local_task
=
tbb_task_
->
get_local_sub_task
();
PROFILE_END_BLOCK
if
(
local_task
!=
nullptr
)
{
local_task
->
execute
();
}
else
{
// Try to steal work.
// External steal will be executed implicitly if success
PROFILE_STEALING
(
"steal work"
)
bool
internal_steal_success
=
tbb_task_
->
steal_work
();
PROFILE_END_BLOCK
if
(
internal_steal_success
)
{
tbb_task_
->
last_stolen_
->
execute
();
}
}
}
tbb_task_
->
my_stack_
->
reset_state
(
stack_state_
);
}
fork_join_sub_task
*
fork_join_task
::
get_local_sub_task
()
{
return
deque_
.
pop_tail
();
}
fork_join_sub_task
*
fork_join_task
::
get_stolen_sub_task
()
{
return
deque_
.
pop_head
();
}
bool
fork_join_task
::
internal_stealing
(
abstract_task
*
other_task
)
{
PROFILE_STEALING
(
"fork_join_task::internal_stealin"
)
auto
cast_other_task
=
reinterpret_cast
<
fork_join_task
*>
(
other_task
);
auto
stolen_sub_task
=
cast_other_task
->
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
else
{
// Make sub-task belong to our fork_join_task instance
stolen_sub_task
->
tbb_task_
=
this
;
stolen_sub_task
->
stack_state_
=
my_stack_
->
save_state
();
// We will execute this next without explicitly moving it onto our stack storage
last_stolen_
=
stolen_sub_task
;
return
true
;
}
}
bool
fork_join_task
::
split_task
(
base
::
spin_lock
*
lock
)
{
PROFILE_STEALING
(
"fork_join_task::split_task"
)
fork_join_sub_task
*
stolen_sub_task
=
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
fork_join_task
task
{
stolen_sub_task
,
this
->
unique_id
()};
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
scheduler
::
execute_task
(
task
,
depth
());
return
true
;
}
void
fork_join_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute fork_join_task"
);
// Bind this instance to our OS thread
my_stack_
=
base
::
this_thread
::
state
<
thread_state
>
()
->
task_stack_
;
root_task_
->
tbb_task_
=
this
;
root_task_
->
stack_state_
=
my_stack_
->
save_state
();
// Execute it on our OS thread until its finished
root_task_
->
execute
();
}
fork_join_sub_task
*
fork_join_task
::
currently_executing
()
const
{
return
currently_executing_
;
}
fork_join_task
::
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
)
:
abstract_task
{
0
,
id
},
root_task_
{
root_task
},
currently_executing_
{
nullptr
},
my_stack_
{
nullptr
},
deque_
{},
last_stolen_
{
nullptr
}
{}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/root_task.cpp
View file @
aa270645
#include "pls/internal/scheduling/root_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
namespace
internal
{
namespace
scheduling
{
}
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/run_on_n_threads_task.cpp
View file @
aa270645
#include "pls/internal/scheduling/run_on_n_threads_task.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
namespace
internal
{
namespace
scheduling
{
}
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
aa270645
...
...
@@ -2,60 +2,63 @@
#include "pls/internal/base/error_handling.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
scheduler
::
scheduler
(
scheduler_memory
*
memory
,
const
unsigned
int
num_threads
)
:
num_threads_
{
num_threads
},
memory_
{
memory
},
sync_barrier_
{
num_threads
+
1
},
terminated_
{
false
}
{
if
(
num_threads_
>
memory_
->
max_threads
())
{
PLS_ERROR
(
"Tried to create scheduler with more OS threads than pre-allocated memory."
);
}
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
// Placement new is required, as the memory of `memory_` is not required to be initialized.
new
((
void
*
)
memory_
->
thread_state_for
(
i
))
thread_state
{
this
,
memory_
->
task_stack_for
(
i
),
i
};
new
((
void
*
)
memory_
->
thread_for
(
i
))
base
::
thread
<
void
(
*
)(),
thread_state
>
(
&
worker_routine
,
memory_
->
thread_state_for
(
i
));
}
}
scheduler
::~
scheduler
()
{
terminate
();
}
void
worker_routine
()
{
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
while
(
true
)
{
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
if
(
my_state
->
scheduler_
->
terminated_
)
{
return
;
}
// The root task must only return when all work is done,
// because of this a simple call is enough to ensure the
// fork-join-section is done (logically joined back into our main thread).
my_state
->
root_task_
->
execute
();
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
}
void
scheduler
::
terminate
(
bool
wait_for_workers
)
{
if
(
terminated_
)
{
return
;
}
terminated_
=
true
;
sync_barrier_
.
wait
();
if
(
wait_for_workers
)
{
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
memory_
->
thread_for
(
i
)
->
join
();
}
}
}
}
namespace
internal
{
namespace
scheduling
{
scheduler
::
scheduler
(
scheduler_memory
*
memory
,
const
unsigned
int
num_threads
)
:
num_threads_
{
num_threads
},
memory_
{
memory
},
sync_barrier_
{
num_threads
+
1
},
terminated_
{
false
}
{
if
(
num_threads_
>
memory_
->
max_threads
())
{
PLS_ERROR
(
"Tried to create scheduler with more OS threads than pre-allocated memory."
);
}
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
// Placement new is required, as the memory of `memory_` is not required to be initialized.
new
((
void
*
)
memory_
->
thread_state_for
(
i
))
thread_state
{
this
,
memory_
->
task_stack_for
(
i
),
i
};
new
((
void
*
)
memory_
->
thread_for
(
i
))
base
::
thread
<
void
(
*
)(),
thread_state
>
(
&
worker_routine
,
memory_
->
thread_state_for
(
i
));
}
}
scheduler
::~
scheduler
()
{
terminate
();
}
void
worker_routine
()
{
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
while
(
true
)
{
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
if
(
my_state
->
scheduler_
->
terminated_
)
{
return
;
}
// The root task must only return when all work is done,
// because of this a simple call is enough to ensure the
// fork-join-section is done (logically joined back into our main thread).
my_state
->
root_task_
->
execute
();
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
}
void
scheduler
::
terminate
(
bool
wait_for_workers
)
{
if
(
terminated_
)
{
return
;
}
terminated_
=
true
;
sync_barrier_
.
wait
();
if
(
wait_for_workers
)
{
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
memory_
->
thread_for
(
i
)
->
join
();
}
}
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/scheduler_memory.cpp
View file @
aa270645
#include "pls/internal/scheduling/scheduler_memory.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
num_threads_
{
num_threads
}
{
threads_
=
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread
)));
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread_state
)));
namespace
internal
{
namespace
scheduling
{
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_aligned_stack
)));
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
}
}
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
num_threads_
{
num_threads
}
{
threads_
=
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread
)));
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread_state
)));
malloc_scheduler_memory
::~
malloc_scheduler_memory
()
{
free
(
threads_
);
free
(
thread_states_
);
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_aligned_stack
)));
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
}
}
malloc_scheduler_memory
::~
malloc_scheduler_memory
()
{
free
(
threads_
);
free
(
thread_states_
);
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
free
(
task_stacks_memory_
[
i
]);
}
free
(
task_stacks_
);
free
(
task_stacks_memory_
);
}
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
free
(
task_stacks_memory_
[
i
]);
}
free
(
task_stacks_
);
free
(
task_stacks_memory_
);
}
}
}
}
}
}
This diff is collapsed.
Click to expand it.
lib/pls/src/internal/scheduling/thread_state.cpp
View file @
aa270645
#include "pls/internal/scheduling/thread_state.h"
namespace
pls
{
namespace
internal
{
namespace
scheduling
{
namespace
internal
{
namespace
scheduling
{
}
}
}
}
}
This diff is collapsed.
Click to expand it.
test/base_tests.cpp
View file @
aa270645
...
...
@@ -13,73 +13,73 @@ static bool base_tests_visited;
static
int
base_tests_local_value_one
;
static
vector
<
int
>
base_tests_local_value_two
;
TEST_CASE
(
"thread creation and joining"
,
"[internal/data_structures/thread.h]"
)
{
base_tests_visited
=
false
;
auto
t1
=
start_thread
([]()
{
base_tests_visited
=
true
;
});
t1
.
join
();
TEST_CASE
(
"thread creation and joining"
,
"[internal/data_structures/thread.h]"
)
{
base_tests_visited
=
false
;
auto
t1
=
start_thread
([]()
{
base_tests_visited
=
true
;
});
t1
.
join
();
REQUIRE
(
base_tests_visited
);
REQUIRE
(
base_tests_visited
);
}
TEST_CASE
(
"thread state"
,
"[internal/data_structures/thread.h]"
)
{
int
state_one
=
1
;
vector
<
int
>
state_two
{
1
,
2
};
TEST_CASE
(
"thread state"
,
"[internal/data_structures/thread.h]"
)
{
int
state_one
=
1
;
vector
<
int
>
state_two
{
1
,
2
};
auto
t1
=
start_thread
([]()
{
base_tests_local_value_one
=
*
this_thread
::
state
<
int
>
();
},
&
state_one
);
auto
t2
=
start_thread
([]()
{
base_tests_local_value_two
=
*
this_thread
::
state
<
vector
<
int
>>
();
},
&
state_two
);
t1
.
join
();
t2
.
join
();
auto
t1
=
start_thread
([]()
{
base_tests_local_value_one
=
*
this_thread
::
state
<
int
>
();
},
&
state_one
);
auto
t2
=
start_thread
([]()
{
base_tests_local_value_two
=
*
this_thread
::
state
<
vector
<
int
>>
();
},
&
state_two
);
t1
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_local_value_one
==
1
);
REQUIRE
(
base_tests_local_value_two
==
vector
<
int
>
{
1
,
2
});
REQUIRE
(
base_tests_local_value_one
==
1
);
REQUIRE
(
base_tests_local_value_two
==
vector
<
int
>
{
1
,
2
});
}
int
base_tests_shared_counter
;
TEST_CASE
(
"spinlock protects concurrent counter"
,
"[internal/data_structures/spinlock.h]"
)
{
constexpr
int
num_iterations
=
1000000
;
base_tests_shared_counter
=
0
;
spin_lock
lock
{};
SECTION
(
"lock can be used by itself"
)
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
base_tests_shared_counter
++
;
lock
.
unlock
();
}
});
auto
t2
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
base_tests_shared_counter
--
;
lock
.
unlock
();
}
});
t1
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_shared_counter
==
0
);
}
SECTION
(
"lock can be used with std::lock_guard"
)
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
base_tests_shared_counter
++
;
}
});
auto
t2
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
base_tests_shared_counter
--
;
}
});
t1
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_shared_counter
==
0
);
}
TEST_CASE
(
"spinlock protects concurrent counter"
,
"[internal/data_structures/spinlock.h]"
)
{
constexpr
int
num_iterations
=
1000000
;
base_tests_shared_counter
=
0
;
spin_lock
lock
{};
SECTION
(
"lock can be used by itself"
)
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
base_tests_shared_counter
++
;
lock
.
unlock
();
}
});
auto
t2
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
base_tests_shared_counter
--
;
lock
.
unlock
();
}
});
t1
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_shared_counter
==
0
);
}
SECTION
(
"lock can be used with std::lock_guard"
)
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
base_tests_shared_counter
++
;
}
});
auto
t2
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
base_tests_shared_counter
--
;
}
});
t1
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_shared_counter
==
0
);
}
}
This diff is collapsed.
Click to expand it.
test/data_structures_test.cpp
View file @
aa270645
...
...
@@ -12,122 +12,121 @@ using namespace pls::internal::data_structures;
using
namespace
pls
::
internal
::
base
;
using
namespace
std
;
TEST_CASE
(
"aligned stack stores objects correctly"
,
"[internal/data_structures/aligned_stack.h]"
)
{
constexpr
long
data_size
=
1024
;
char
data
[
data_size
];
aligned_stack
stack
{
data
,
data_size
};
SECTION
(
"stack correctly pushes sub linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly pushes above linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
system_details
::
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
auto
big_pointer_one
=
stack
.
push
(
big_data_one
);
auto
small_pointer_one
=
stack
.
push
(
small_data_one
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
big_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly stores and retrieves objects"
)
{
std
::
array
<
char
,
5
>
data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
stack
.
push
(
data_one
);
auto
retrieved_data
=
stack
.
pop
<
std
::
array
<
char
,
5
>>
();
REQUIRE
(
retrieved_data
==
std
::
array
<
char
,
5
>
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
});
}
SECTION
(
"stack can push and pop multiple times with correct alignment"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
stack
.
pop
<
typeof
(
small_data_three
)
>
();
stack
.
pop
<
typeof
(
small_data_two
)
>
();
auto
pointer_four
=
stack
.
push
(
small_data_two
);
auto
pointer_five
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_four
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_five
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
pointer_four
==
pointer_two
);
REQUIRE
(
pointer_five
==
pointer_three
);
}
TEST_CASE
(
"aligned stack stores objects correctly"
,
"[internal/data_structures/aligned_stack.h]"
)
{
constexpr
long
data_size
=
1024
;
char
data
[
data_size
];
aligned_stack
stack
{
data
,
data_size
};
SECTION
(
"stack correctly pushes sub linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly pushes above linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
system_details
::
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
auto
big_pointer_one
=
stack
.
push
(
big_data_one
);
auto
small_pointer_one
=
stack
.
push
(
small_data_one
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
big_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly stores and retrieves objects"
)
{
std
::
array
<
char
,
5
>
data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
stack
.
push
(
data_one
);
auto
retrieved_data
=
stack
.
pop
<
std
::
array
<
char
,
5
>>
();
REQUIRE
(
retrieved_data
==
std
::
array
<
char
,
5
>
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
});
}
SECTION
(
"stack can push and pop multiple times with correct alignment"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
stack
.
pop
<
typeof
(
small_data_three
)
>
();
stack
.
pop
<
typeof
(
small_data_two
)
>
();
auto
pointer_four
=
stack
.
push
(
small_data_two
);
auto
pointer_five
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_four
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_five
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
pointer_four
==
pointer_two
);
REQUIRE
(
pointer_five
==
pointer_three
);
}
}
TEST_CASE
(
"deque stores objects correctly"
,
"[internal/data_structures/deque.h]"
)
{
class
my_item
:
public
deque_item
{
TEST_CASE
(
"deque stores objects correctly"
,
"[internal/data_structures/deque.h]"
)
{
class
my_item
:
public
deque_item
{
};
};
deque
<
my_item
>
deque
;
my_item
one
,
two
,
three
;
deque
<
my_item
>
deque
;
my_item
one
,
two
,
three
;
SECTION
(
"add and remove items form the tail"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
SECTION
(
"add and remove items form the tail"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
}
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
}
SECTION
(
"handles getting empty by popping the tail correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
SECTION
(
"handles getting empty by popping the tail correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
}
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
}
SECTION
(
"remove items form the head"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
SECTION
(
"remove items form the head"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
three
);
}
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
three
);
}
SECTION
(
"handles getting empty by popping the head correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
SECTION
(
"handles getting empty by popping the head correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
}
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
}
SECTION
(
"handles getting empty by popping the head and tail correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
SECTION
(
"handles getting empty by popping the head and tail correctly"
)
{
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
}
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
}
}
This diff is collapsed.
Click to expand it.
test/scheduling_tests.cpp
View file @
aa270645
...
...
@@ -4,76 +4,75 @@
using
namespace
pls
;
class
once_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>*
counter_
;
int
children_
;
class
once_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>
*
counter_
;
int
children_
;
protected
:
void
execute_internal
()
override
{
(
*
counter_
)
++
;
for
(
int
i
=
0
;
i
<
children_
;
i
++
)
{
spawn_child
(
once_sub_task
(
counter_
,
children_
-
1
));
}
protected
:
void
execute_internal
()
override
{
(
*
counter_
)
++
;
for
(
int
i
=
0
;
i
<
children_
;
i
++
)
{
spawn_child
(
once_sub_task
(
counter_
,
children_
-
1
));
}
}
public
:
explicit
once_sub_task
(
std
::
atomic
<
int
>*
counter
,
int
children
)
:
fork_join_sub_task
(),
counter_
{
counter
},
children_
{
children
}
{}
public
:
explicit
once_sub_task
(
std
::
atomic
<
int
>
*
counter
,
int
children
)
:
fork_join_sub_task
(),
counter_
{
counter
},
children_
{
children
}
{}
};
class
force_steal_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>*
parent_counter_
;
std
::
atomic
<
int
>*
overall_counter_
;
class
force_steal_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>
*
parent_counter_
;
std
::
atomic
<
int
>
*
overall_counter_
;
protected
:
void
execute_internal
()
override
{
(
*
overall_counter_
)
--
;
if
(
overall_counter_
->
load
()
>
0
)
{
std
::
atomic
<
int
>
counter
{
1
};
spawn_child
(
force_steal_sub_task
(
&
counter
,
overall_counter_
));
while
(
counter
.
load
()
>
0
)
;
// Spin...
}
(
*
parent_counter_
)
--
;
protected
:
void
execute_internal
()
override
{
(
*
overall_counter_
)
--
;
if
(
overall_counter_
->
load
()
>
0
)
{
std
::
atomic
<
int
>
counter
{
1
};
spawn_child
(
force_steal_sub_task
(
&
counter
,
overall_counter_
));
while
(
counter
.
load
()
>
0
);
// Spin...
}
public
:
explicit
force_steal_sub_task
(
std
::
atomic
<
int
>*
parent_counter
,
std
::
atomic
<
int
>*
overall_counter
)
:
fork_join_sub_task
(),
parent_counter_
{
parent_counter
},
overall_counter_
{
overall_counter
}
{}
(
*
parent_counter_
)
--
;
}
public
:
explicit
force_steal_sub_task
(
std
::
atomic
<
int
>
*
parent_counter
,
std
::
atomic
<
int
>
*
overall_counter
)
:
fork_join_sub_task
(),
parent_counter_
{
parent_counter
},
overall_counter_
{
overall_counter
}
{}
};
TEST_CASE
(
"tbb task are scheduled correctly"
,
"[internal/scheduling/fork_join_task.h]"
)
{
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2
<<
12
};
TEST_CASE
(
"tbb task are scheduled correctly"
,
"[internal/scheduling/fork_join_task.h]"
)
{
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2
<<
12
};
SECTION
(
"tasks are executed exactly once"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
2
};
int
start_counter
=
4
;
int
total_tasks
=
1
+
4
+
4
*
3
+
4
*
3
*
2
+
4
*
3
*
2
*
1
;
std
::
atomic
<
int
>
counter
{
0
};
SECTION
(
"tasks are executed exactly once"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
2
};
int
start_counter
=
4
;
int
total_tasks
=
1
+
4
+
4
*
3
+
4
*
3
*
2
+
4
*
3
*
2
*
1
;
std
::
atomic
<
int
>
counter
{
0
};
my_scheduler
.
perform_work
([
&
]
()
{
once_sub_task
sub_task
{
&
counter
,
start_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
});
my_scheduler
.
perform_work
([
&
]()
{
once_sub_task
sub_task
{
&
counter
,
start_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
});
REQUIRE
(
counter
.
load
()
==
total_tasks
);
my_scheduler
.
terminate
(
true
);
}
REQUIRE
(
counter
.
load
()
==
total_tasks
);
my_scheduler
.
terminate
(
true
);
}
SECTION
(
"tasks can be stolen"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
8
};
my_scheduler
.
perform_work
([
&
]
()
{
std
::
atomic
<
int
>
dummy_parent
{
1
},
overall_counter
{
8
};
force_steal_sub_task
sub_task
{
&
dummy_parent
,
&
overall_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
});
my_scheduler
.
terminate
(
true
);
}
SECTION
(
"tasks can be stolen"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
8
};
my_scheduler
.
perform_work
([
&
]()
{
std
::
atomic
<
int
>
dummy_parent
{
1
},
overall_counter
{
8
};
force_steal_sub_task
sub_task
{
&
dummy_parent
,
&
overall_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
});
my_scheduler
.
terminate
(
true
);
}
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment