Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
aa270645
authored
Apr 17, 2019
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Reformate code to fit GNU code formating style.
parent
3ff10baa
Pipeline
#1157
passed with stages
in 3 minutes 36 seconds
Changes
46
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
46 changed files
with
1813 additions
and
1745 deletions
+1813
-1745
app/benchmark_fft/main.cpp
+51
-52
app/invoke_parallel/main.cpp
+34
-34
app/playground/main.cpp
+4
-3
app/test_for_new/main.cpp
+3
-4
lib/pls/include/pls/algorithms/invoke_parallel.h
+9
-7
lib/pls/include/pls/algorithms/invoke_parallel_impl.h
+52
-50
lib/pls/include/pls/internal/base/alignment.h
+17
-15
lib/pls/include/pls/internal/base/barrier.h
+23
-21
lib/pls/include/pls/internal/base/spin_lock.h
+8
-6
lib/pls/include/pls/internal/base/system_details.h
+23
-21
lib/pls/include/pls/internal/base/tas_spin_lock.h
+24
-24
lib/pls/include/pls/internal/base/thread.h
+102
-99
lib/pls/include/pls/internal/base/thread_impl.h
+63
-62
lib/pls/include/pls/internal/base/ttas_spin_lock.h
+22
-24
lib/pls/include/pls/internal/data_structures/aligned_stack.h
+42
-39
lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
+30
-28
lib/pls/include/pls/internal/data_structures/deque.h
+52
-50
lib/pls/include/pls/internal/helpers/mini_benchmark.h
+40
-38
lib/pls/include/pls/internal/helpers/prohibit_new.h
+3
-3
lib/pls/include/pls/internal/helpers/unique_id.h
+20
-18
lib/pls/include/pls/internal/scheduling/abstract_task.h
+34
-32
lib/pls/include/pls/internal/scheduling/fork_join_task.h
+83
-81
lib/pls/include/pls/internal/scheduling/root_task.h
+67
-65
lib/pls/include/pls/internal/scheduling/run_on_n_threads_task.h
+103
-100
lib/pls/include/pls/internal/scheduling/scheduler.h
+46
-44
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+66
-64
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
+60
-57
lib/pls/include/pls/internal/scheduling/thread_state.h
+34
-32
lib/pls/include/pls/pls.h
+10
-8
lib/pls/src/internal/base/alignment.cpp
+22
-20
lib/pls/src/internal/base/barrier.cpp
+15
-13
lib/pls/src/internal/base/tas_spin_lock.cpp
+28
-26
lib/pls/src/internal/base/thread.cpp
+10
-8
lib/pls/src/internal/base/ttas_spin_lock.cpp
+43
-41
lib/pls/src/internal/data_structures/aligned_stack.cpp
+10
-8
lib/pls/src/internal/data_structures/deque.cpp
+54
-52
lib/pls/src/internal/scheduling/abstract_task.cpp
+60
-58
lib/pls/src/internal/scheduling/fork_join_task.cpp
+128
-126
lib/pls/src/internal/scheduling/root_task.cpp
+4
-4
lib/pls/src/internal/scheduling/run_on_n_threads_task.cpp
+4
-4
lib/pls/src/internal/scheduling/scheduler.cpp
+58
-55
lib/pls/src/internal/scheduling/scheduler_memory.cpp
+29
-24
lib/pls/src/internal/scheduling/thread_state.cpp
+4
-4
test/base_tests.cpp
+60
-60
test/data_structures_test.cpp
+102
-103
test/scheduling_tests.cpp
+57
-58
No files found.
app/benchmark_fft/main.cpp
View file @
aa270645
...
@@ -12,76 +12,75 @@ static constexpr int INPUT_SIZE = 2064;
...
@@ -12,76 +12,75 @@ static constexpr int INPUT_SIZE = 2064;
typedef
std
::
vector
<
std
::
complex
<
double
>>
complex_vector
;
typedef
std
::
vector
<
std
::
complex
<
double
>>
complex_vector
;
void
divide
(
complex_vector
::
iterator
data
,
int
n
)
{
void
divide
(
complex_vector
::
iterator
data
,
int
n
)
{
complex_vector
tmp_odd_elements
(
n
/
2
);
complex_vector
tmp_odd_elements
(
n
/
2
);
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
tmp_odd_elements
[
i
]
=
data
[
i
*
2
+
1
];
tmp_odd_elements
[
i
]
=
data
[
i
*
2
+
1
];
}
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
]
=
data
[
i
*
2
];
data
[
i
]
=
data
[
i
*
2
];
}
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
+
n
/
2
]
=
tmp_odd_elements
[
i
];
data
[
i
+
n
/
2
]
=
tmp_odd_elements
[
i
];
}
}
}
}
void
combine
(
complex_vector
::
iterator
data
,
int
n
)
{
void
combine
(
complex_vector
::
iterator
data
,
int
n
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
std
::
complex
<
double
>
even
=
data
[
i
];
std
::
complex
<
double
>
even
=
data
[
i
];
std
::
complex
<
double
>
odd
=
data
[
i
+
n
/
2
];
std
::
complex
<
double
>
odd
=
data
[
i
+
n
/
2
];
// w is the "twiddle-factor".
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
// so it won't impact the performance comparison.
std
::
complex
<
double
>
w
=
exp
(
std
::
complex
<
double
>
(
0
,
-
2.
*
M_PI
*
i
/
n
));
std
::
complex
<
double
>
w
=
exp
(
std
::
complex
<
double
>
(
0
,
-
2.
*
M_PI
*
i
/
n
));
data
[
i
]
=
even
+
w
*
odd
;
data
[
i
]
=
even
+
w
*
odd
;
data
[
i
+
n
/
2
]
=
even
-
w
*
odd
;
data
[
i
+
n
/
2
]
=
even
-
w
*
odd
;
}
}
}
}
void
fft
(
complex_vector
::
iterator
data
,
int
n
)
{
void
fft
(
complex_vector
::
iterator
data
,
int
n
)
{
if
(
n
<
2
)
{
if
(
n
<
2
)
{
return
;
return
;
}
}
divide
(
data
,
n
);
divide
(
data
,
n
);
if
(
n
<=
CUTOFF
)
{
if
(
n
<=
CUTOFF
)
{
fft
(
data
,
n
/
2
);
fft
(
data
,
n
/
2
);
fft
(
data
+
n
/
2
,
n
/
2
);
fft
(
data
+
n
/
2
,
n
/
2
);
}
else
{
}
else
{
pls
::
invoke_parallel
(
pls
::
invoke_parallel
(
[
&
]
{
fft
(
data
,
n
/
2
);
},
[
&
]
{
fft
(
data
,
n
/
2
);
},
[
&
]
{
fft
(
data
+
n
/
2
,
n
/
2
);
}
[
&
]
{
fft
(
data
+
n
/
2
,
n
/
2
);
}
);
);
}
}
combine
(
data
,
n
);
combine
(
data
,
n
);
}
}
complex_vector
prepare_input
(
int
input_size
)
{
complex_vector
prepare_input
(
int
input_size
)
{
std
::
vector
<
double
>
known_frequencies
{
2
,
11
,
52
,
88
,
256
};
std
::
vector
<
double
>
known_frequencies
{
2
,
11
,
52
,
88
,
256
};
complex_vector
data
(
input_size
);
complex_vector
data
(
input_size
);
// Set our input data to match a time series of the known_frequencies.
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
// When applying fft to this time-series we should find these frequencies.
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
data
[
i
]
=
std
::
complex
<
double
>
(
0.0
,
0.0
);
data
[
i
]
=
std
::
complex
<
double
>
(
0.0
,
0.0
);
for
(
auto
frequencie
:
known_frequencies
)
{
for
(
auto
frequencie
:
known_frequencies
)
{
data
[
i
]
+=
sin
(
2
*
M_PI
*
frequencie
*
i
/
input_size
);
data
[
i
]
+=
sin
(
2
*
M_PI
*
frequencie
*
i
/
input_size
);
}
}
}
}
return
data
;
return
data
;
}
}
int
main
()
{
int
main
()
{
PROFILE_ENABLE
PROFILE_ENABLE
complex_vector
initial_input
=
prepare_input
(
INPUT_SIZE
);
complex_vector
initial_input
=
prepare_input
(
INPUT_SIZE
);
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
complex_vector
input
=
initial_input
;
complex_vector
input
=
initial_input
;
fft
(
input
.
begin
(),
input
.
size
());
fft
(
input
.
begin
(),
input
.
size
());
},
8
,
4000
);
},
8
,
4000
);
PROFILE_SAVE
(
"test_profile.prof"
)
PROFILE_SAVE
(
"test_profile.prof"
)
}
}
app/invoke_parallel/main.cpp
View file @
aa270645
...
@@ -8,44 +8,44 @@ static pls::static_scheduler_memory<8, 2 << 14> my_scheduler_memory;
...
@@ -8,44 +8,44 @@ static pls::static_scheduler_memory<8, 2 << 14> my_scheduler_memory;
static
constexpr
int
CUTOFF
=
10
;
static
constexpr
int
CUTOFF
=
10
;
long
fib_serial
(
long
n
)
{
long
fib_serial
(
long
n
)
{
if
(
n
==
0
)
{
if
(
n
==
0
)
{
return
0
;
return
0
;
}
}
if
(
n
==
1
)
{
if
(
n
==
1
)
{
return
1
;
return
1
;
}
}
return
fib_serial
(
n
-
1
)
+
fib_serial
(
n
-
2
);
return
fib_serial
(
n
-
1
)
+
fib_serial
(
n
-
2
);
}
}
long
fib
(
long
n
)
{
long
fib
(
long
n
)
{
if
(
n
<=
CUTOFF
)
{
if
(
n
<=
CUTOFF
)
{
return
fib_serial
(
n
);
return
fib_serial
(
n
);
}
}
// Actual 'invoke_parallel' logic/code
// Actual 'invoke_parallel' logic/code
int
left
,
right
;
int
left
,
right
;
pls
::
invoke_parallel
(
pls
::
invoke_parallel
(
[
&
]
{
left
=
fib
(
n
-
1
);
},
[
&
]
{
left
=
fib
(
n
-
1
);
},
[
&
]
{
right
=
fib
(
n
-
2
);
}
[
&
]
{
right
=
fib
(
n
-
2
);
}
);
);
return
left
+
right
;
return
left
+
right
;
}
}
int
main
()
{
int
main
()
{
PROFILE_ENABLE
PROFILE_ENABLE
pls
::
scheduler
scheduler
{
&
my_scheduler_memory
,
8
};
pls
::
scheduler
scheduler
{
&
my_scheduler_memory
,
8
};
long
result
;
long
result
;
scheduler
.
perform_work
([
&
]
{
scheduler
.
perform_work
([
&
]
{
PROFILE_MAIN_THREAD
PROFILE_MAIN_THREAD
// Call looks just the same, only requirement is
// Call looks just the same, only requirement is
// the enclosure in the perform_work lambda.
// the enclosure in the perform_work lambda.
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
result
=
fib
(
30
);
result
=
fib
(
30
);
std
::
cout
<<
"Fib(30)="
<<
result
<<
std
::
endl
;
std
::
cout
<<
"Fib(30)="
<<
result
<<
std
::
endl
;
}
}
});
});
PROFILE_SAVE
(
"test_profile.prof"
)
PROFILE_SAVE
(
"test_profile.prof"
)
}
}
app/playground/main.cpp
View file @
aa270645
...
@@ -10,8 +10,9 @@
...
@@ -10,8 +10,9 @@
#include <pls/internal/scheduling/root_task.h>
#include <pls/internal/scheduling/root_task.h>
#include <pls/internal/helpers/unique_id.h>
#include <pls/internal/helpers/unique_id.h>
int
main
()
{
int
main
()
{
std
::
cout
<<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>::
create_id
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>::
create_id
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
helpers
::
unique_id
::
create
<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>>
().
type_
.
hash_code
()
<<
std
::
endl
;
std
::
cout
<<
pls
::
internal
::
helpers
::
unique_id
::
create
<
pls
::
internal
::
scheduling
::
root_task
<
void
(
*
)
>>
().
type_
.
hash_code
()
<<
std
::
endl
;
}
}
app/test_for_new/main.cpp
View file @
aa270645
...
@@ -5,9 +5,8 @@ using namespace pls::internal::base;
...
@@ -5,9 +5,8 @@ using namespace pls::internal::base;
int
global
=
0
;
int
global
=
0
;
int
main
()
{
int
main
()
{
// Try to use every feature, to trigger the prohibited use of new if found somewhere
// Try to use every feature, to trigger the prohibited use of new if found somewhere
auto
t1
=
start_thread
([]
()
{});
auto
t1
=
start_thread
([]()
{});
t1
.
join
();
t1
.
join
();
}
}
lib/pls/include/pls/algorithms/invoke_parallel.h
View file @
aa270645
...
@@ -6,15 +6,17 @@
...
@@ -6,15 +6,17 @@
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/scheduler.h"
namespace
pls
{
namespace
pls
{
namespace
algorithm
{
namespace
algorithm
{
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
);
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
template
<
typename
Function1
,
typename
Function2
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
);
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
);
// ...and so on, add more if we decide to keep this design
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
}
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
);
// ...and so on, add more if we decide to keep this design
}
}
}
#include "invoke_parallel_impl.h"
#include "invoke_parallel_impl.h"
...
...
lib/pls/include/pls/algorithms/invoke_parallel_impl.h
View file @
aa270645
...
@@ -7,65 +7,67 @@
...
@@ -7,65 +7,67 @@
#include "pls/internal/helpers/unique_id.h"
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
pls
{
namespace
algorithm
{
namespace
algorithm
{
namespace
internal
{
namespace
internal
{
using
namespace
::
pls
::
internal
::
scheduling
;
template
<
typename
Body
>
using
namespace
::
pls
::
internal
::
scheduling
;
inline
void
run_body
(
const
Body
&
internal_body
,
const
abstract_task
::
id
&
id
)
{
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto
current_task
=
scheduler
::
current_task
();
if
(
current_task
->
unique_id
()
==
id
)
{
auto
current_sub_task
=
reinterpret_cast
<
fork_join_task
*>
(
current_task
)
->
currently_executing
();
internal_body
(
current_sub_task
);
}
else
{
fork_join_lambda
<
Body
>
root_body
(
&
internal_body
);
fork_join_task
root_task
{
&
root_body
,
id
};
scheduler
::
execute_task
(
root_task
);
}
}
}
template
<
typename
Function1
,
typename
Function2
>
template
<
typename
Body
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
)
{
inline
void
run_body
(
const
Body
&
internal_body
,
const
abstract_task
::
id
&
id
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
// Make sure we are in the context of this invoke_parallel instance,
using
namespace
::
pls
::
internal
::
helpers
;
// if not we will spawn it as a new 'fork-join-style' task.
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
>
();
auto
current_task
=
scheduler
::
current_task
();
if
(
current_task
->
unique_id
()
==
id
)
{
auto
current_sub_task
=
reinterpret_cast
<
fork_join_task
*>
(
current_task
)
->
currently_executing
();
internal_body
(
current_sub_task
);
}
else
{
fork_join_lambda
<
Body
>
root_body
(
&
internal_body
);
fork_join_task
root_task
{
&
root_body
,
id
};
scheduler
::
execute_task
(
root_task
);
}
}
}
auto
internal_body
=
[
&
]
(
fork_join_sub_task
*
this_task
){
template
<
typename
Function1
,
typename
Function2
>
auto
sub_task_body_1
=
[
&
]
(
fork_join_sub_task
*
){
function1
();
};
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
)
{
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
>
();
this_task
->
spawn_child
(
sub_task_1
);
auto
internal_body
=
[
&
](
fork_join_sub_task
*
this_task
)
{
function2
();
// Execute last function 'inline' without spawning a sub_task object
auto
sub_task_body_1
=
[
&
](
fork_join_sub_task
*
)
{
function1
();
};
this_task
->
wait_for_all
();
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
};
internal
::
run_body
(
internal_body
,
id
);
this_task
->
spawn_child
(
sub_task_1
);
}
function2
();
// Execute last function 'inline' without spawning a sub_task object
this_task
->
wait_for_all
();
};
internal
::
run_body
(
internal_body
,
id
);
}
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
template
<
typename
Function1
,
typename
Function2
,
typename
Function3
>
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
)
{
void
invoke_parallel
(
const
Function1
&
function1
,
const
Function2
&
function2
,
const
Function3
&
function3
)
{
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
scheduling
;
using
namespace
::
pls
::
internal
::
helpers
;
using
namespace
::
pls
::
internal
::
helpers
;
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
,
Function3
>
();
static
abstract_task
::
id
id
=
unique_id
::
create
<
Function1
,
Function2
,
Function3
>
();
auto
internal_body
=
[
&
]
(
fork_join_sub_task
*
this_task
)
{
auto
internal_body
=
[
&
](
fork_join_sub_task
*
this_task
)
{
auto
sub_task_body_1
=
[
&
]
(
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_body_1
=
[
&
](
fork_join_sub_task
*
)
{
function1
();
};
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
auto
sub_task_1
=
fork_join_lambda
<
decltype
(
sub_task_body_1
)
>
(
&
sub_task_body_1
);
auto
sub_task_body_2
=
[
&
]
(
fork_join_sub_task
*
)
{
function2
();
};
auto
sub_task_body_2
=
[
&
](
fork_join_sub_task
*
)
{
function2
();
};
auto
sub_task_2
=
fork_join_lambda
<
decltype
(
sub_task_body_2
)
>
(
&
sub_task_body_2
);
auto
sub_task_2
=
fork_join_lambda
<
decltype
(
sub_task_body_2
)
>
(
&
sub_task_body_2
);
this_task
->
spawn_child
(
sub_task_1
);
this_task
->
spawn_child
(
sub_task_1
);
this_task
->
spawn_child
(
sub_task_2
);
this_task
->
spawn_child
(
sub_task_2
);
function3
();
// Execute last function 'inline' without spawning a sub_task object
function3
();
// Execute last function 'inline' without spawning a sub_task object
this_task
->
wait_for_all
();
this_task
->
wait_for_all
();
};
};
internal
::
run_body
(
internal_body
,
id
);
internal
::
run_body
(
internal_body
,
id
);
}
}
}
}
}
}
#endif //PLS_INVOKE_PARALLEL_IMPL_H
#endif //PLS_INVOKE_PARALLEL_IMPL_H
lib/pls/include/pls/internal/base/alignment.h
View file @
aa270645
...
@@ -8,21 +8,23 @@
...
@@ -8,21 +8,23 @@
#include "system_details.h"
#include "system_details.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
namespace
alignment
{
namespace
alignment
{
template
<
typename
T
>
struct
aligned_wrapper
{
template
<
typename
T
>
alignas
(
system_details
::
CACHE_LINE_SIZE
)
unsigned
char
data
[
sizeof
(
T
)];
struct
aligned_wrapper
{
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
alignas
(
system_details
::
CACHE_LINE_SIZE
)
unsigned
char
data
[
sizeof
(
T
)];
};
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
void
*
allocate_aligned
(
size_t
size
);
};
void
*
allocate_aligned
(
size_t
size
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
char
*
next_alignment
(
char
*
pointer
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
}
char
*
next_alignment
(
char
*
pointer
);
}
}
}
}
}
}
}
#endif //PLS_ALIGNMENT_H
#endif //PLS_ALIGNMENT_H
lib/pls/include/pls/internal/base/barrier.h
View file @
aa270645
...
@@ -5,27 +5,29 @@
...
@@ -5,27 +5,29 @@
#include <pthread.h>
#include <pthread.h>
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
/**
* Provides standard barrier behaviour.
/**
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* Provides standard barrier behaviour.
* thus blocking all threads until everyone reached the barrier.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
*
* thus blocking all threads until everyone reached the barrier.
* PORTABILITY:
*
* Current implementation is based on pthreads.
* PORTABILITY:
*/
* Current implementation is based on pthreads.
class
barrier
{
*/
pthread_barrier_t
barrier_
;
class
barrier
{
pthread_barrier_t
barrier_
;
public
:
explicit
barrier
(
unsigned
int
count
);
public
:
~
barrier
();
explicit
barrier
(
unsigned
int
count
);
~
barrier
();
void
wait
();
};
void
wait
();
}
};
}
}
}
}
}
#endif //PLS_BARRIER_H
#endif //PLS_BARRIER_H
lib/pls/include/pls/internal/base/spin_lock.h
View file @
aa270645
...
@@ -6,12 +6,14 @@
...
@@ -6,12 +6,14 @@
#include "ttas_spin_lock.h"
#include "ttas_spin_lock.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
// Default Spin-Lock implementation for this project.
using
spin_lock
=
tas_spin_lock
;
// Default Spin-Lock implementation for this project.
}
using
spin_lock
=
tas_spin_lock
;
}
}
}
}
}
#endif //PLS_SPINLOCK_H
#endif //PLS_SPINLOCK_H
lib/pls/include/pls/internal/base/system_details.h
View file @
aa270645
...
@@ -5,29 +5,31 @@
...
@@ -5,29 +5,31 @@
#include <cstdint>
#include <cstdint>
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
/**
* Collection of system details, e.g. hardware cache line size.
*
* PORTABILITY:
* Currently sane default values for x86.
*/
namespace
system_details
{
/**
* Most processors have 64 byte cache lines
*/
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
/**
/**
* Choose one of the following ways to store thread specific data.
* Collection of system details, e.g. hardware cache line size.
* Try to choose the fastest available on this processor/system.
*
*/
* PORTABILITY:
* Currently sane default values for x86.
*/
namespace
system_details
{
/**
* Most processors have 64 byte cache lines
*/
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
/**
* Choose one of the following ways to store thread specific data.
* Try to choose the fastest available on this processor/system.
*/
// #define PLS_THREAD_SPECIFIC_PTHREAD
// #define PLS_THREAD_SPECIFIC_PTHREAD
#define PLS_THREAD_SPECIFIC_COMPILER
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
}
}
}
}
}
#endif //PLS_SYSTEM_DETAILS_H
#endif //PLS_SYSTEM_DETAILS_H
lib/pls/include/pls/internal/base/tas_spin_lock.h
View file @
aa270645
...
@@ -10,30 +10,30 @@
...
@@ -10,30 +10,30 @@
#include "pls/internal/base/thread.h"
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
/**
* A simple set and test_and_set based spin lock implementation.
/**
*
* A simple set and test_and_set based spin lock implementation.
* PORTABILITY:
*
* Current implementation is based on C++ 11 atomic_flag.
* PORTABILITY:
*/
* Current implementation is based on C++ 11 atomic_flag.
class
tas_spin_lock
{
*/
std
::
atomic_flag
flag_
;
class
tas_spin_lock
{
unsigned
int
yield_at_tries_
;
std
::
atomic_flag
flag_
;
unsigned
int
yield_at_tries_
;
public
:
public
:
tas_spin_lock
()
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
1024
}
{};
tas_spin_lock
()
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
1024
}
{};
tas_spin_lock
(
const
tas_spin_lock
&
other
)
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
tas_spin_lock
(
const
tas_spin_lock
&
other
)
:
flag_
{
ATOMIC_FLAG_INIT
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
void
unlock
();
};
};
}
}
}
}
}
}
#endif //PLS_TAS_SPIN_LOCK_H
#endif //PLS_TAS_SPIN_LOCK_H
lib/pls/include/pls/internal/base/thread.h
View file @
aa270645
...
@@ -13,109 +13,112 @@
...
@@ -13,109 +13,112 @@
#include "system_details.h"
#include "system_details.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
using
thread_entrypoint
=
void
();
using
thread_entrypoint
=
void
();
/**
* Static methods than can be performed on the current thread.
/**
*
* Static methods than can be performed on the current thread.
* usage:
*
* this_thread::yield();
* usage:
* T* state = this_thread::state<T>();
* this_thread::yield();
*
* T* state = this_thread::state<T>();
* PORTABILITY:
*
* Current implementation is based on pthreads.
* PORTABILITY:
*/
* Current implementation is based on pthreads.
class
this_thread
{
*/
template
<
typename
Function
,
typename
State
>
class
this_thread
{
friend
class
thread
;
template
<
typename
Function
,
typename
State
>
friend
class
thread
;
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
static
pthread_key_t
local_storage_key_
;
static
pthread_key_t
local_storage_key_
;
static
bool
local_storage_key_initialized_
;
static
bool
local_storage_key_initialized_
;
#endif
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
#ifdef PLS_THREAD_SPECIFIC_COMPILER
static
__thread
void
*
local_state_
;
static
__thread
void
*
local_state_
;
#endif
#endif
public
:
public
:
static
void
yield
()
{
static
void
yield
()
{
pthread_yield
();
pthread_yield
();
}
}
/**
/**
* Retrieves the local state pointer.
* Retrieves the local state pointer.
*
*
* @tparam T The type of the state that is stored.
* @tparam T The type of the state that is stored.
* @return The state pointer hold for this thread.
* @return The state pointer hold for this thread.
*/
*/
template
<
typename
T
>
template
<
typename
T
>
static
T
*
state
();
static
T
*
state
();
/**
/**
* Stores a pointer to the thread local state object.
* Stores a pointer to the thread local state object.
* The memory management for this has to be done by the user,
* The memory management for this has to be done by the user,
* we only keep the pointer.
* we only keep the pointer.
*
*
* @tparam T The type of the state that is stored.
* @tparam T The type of the state that is stored.
* @param state_pointer A pointer to the threads state object.
* @param state_pointer A pointer to the threads state object.
*/
*/
template
<
typename
T
>
template
<
typename
T
>
static
void
set_state
(
T
*
state_pointer
);
static
void
set_state
(
T
*
state_pointer
);
};
};
/**
/**
* Abstraction for starting a function in a separate thread.
* Abstraction for starting a function in a separate thread.
*
*
* @tparam Function Lambda being started on the new thread.
* @tparam Function Lambda being started on the new thread.
* @tparam State State type held for this thread.
* @tparam State State type held for this thread.
*
*
* usage:
* usage:
* T* state;
* T* state;
* auto thread = start_thread([] {
* auto thread = start_thread([] {
* // Run on new thread
* // Run on new thread
* }, state);
* }, state);
* thread.join(); // Wait for it to finish
* thread.join(); // Wait for it to finish
*
*
* PORTABILITY:
* PORTABILITY:
* Current implementation is based on pthreads.
* Current implementation is based on pthreads.
*/
*/
template
<
typename
Function
,
typename
State
>
template
<
typename
Function
,
typename
State
>
class
thread
{
class
thread
{
friend
class
this_thread
;
friend
class
this_thread
;
// Keep a copy of the function (lambda) in this object to make sure it is valid when called!
// Keep a copy of the function (lambda) in this object to make sure it is valid when called!
Function
function_
;
Function
function_
;
State
*
state_pointer_
;
State
*
state_pointer_
;
// Wee need to wait for the started function to read
// Wee need to wait for the started function to read
// the function_ and state_pointer_ property before returning
// the function_ and state_pointer_ property before returning
// from the constructor, as the object might be moved after this.
// from the constructor, as the object might be moved after this.
std
::
atomic_flag
*
startup_flag_
;
std
::
atomic_flag
*
startup_flag_
;
// Keep handle to native implementation
// Keep handle to native implementation
pthread_t
pthread_thread_
;
pthread_t
pthread_thread_
;
static
void
*
start_pthread_internal
(
void
*
thread_pointer
);
static
void
*
start_pthread_internal
(
void
*
thread_pointer
);
public
:
public
:
explicit
thread
(
const
Function
&
function
,
State
*
state_pointer
);
explicit
thread
(
const
Function
&
function
,
State
*
state_pointer
);
public
:
public
:
void
join
();
void
join
();
// make object move only
// make object move only
thread
(
thread
&&
)
noexcept
=
default
;
thread
(
thread
&&
)
noexcept
=
default
;
thread
&
operator
=
(
thread
&&
)
noexcept
=
default
;
thread
&
operator
=
(
thread
&&
)
noexcept
=
default
;
thread
(
const
thread
&
)
=
delete
;
thread
(
const
thread
&
)
=
delete
;
thread
&
operator
=
(
const
thread
&
)
=
delete
;
thread
&
operator
=
(
const
thread
&
)
=
delete
;
};
};
template
<
typename
Function
,
typename
State
>
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
);
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
);
template
<
typename
Function
>
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
);
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
);
}
}
}
}
}
}
#include "thread_impl.h"
#include "thread_impl.h"
...
...
lib/pls/include/pls/internal/base/thread_impl.h
View file @
aa270645
...
@@ -3,86 +3,87 @@
...
@@ -3,86 +3,87 @@
#define PLS_THREAD_IMPL_H
#define PLS_THREAD_IMPL_H
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
template
<
typename
T
>
T
*
this_thread
::
state
()
{
template
<
typename
T
>
T
*
this_thread
::
state
()
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
return
reinterpret_cast
<
T
*>
(
pthread_getspecific
(
local_storage_key_
));
return
reinterpret_cast
<
T
*>
(
pthread_getspecific
(
local_storage_key_
));
#endif
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
#ifdef PLS_THREAD_SPECIFIC_COMPILER
return
reinterpret_cast
<
T
*>
(
local_state_
);
return
reinterpret_cast
<
T
*>
(
local_state_
);
#endif
#endif
}
}
template
<
typename
T
>
template
<
typename
T
>
void
this_thread
::
set_state
(
T
*
state_pointer
)
{
void
this_thread
::
set_state
(
T
*
state_pointer
)
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific
(
this_thread
::
local_storage_key_
,
(
void
*
)
state_pointer
);
pthread_setspecific
(
this_thread
::
local_storage_key_
,
(
void
*
)
state_pointer
);
#endif
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
#ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_
=
state_pointer
;
local_state_
=
state_pointer
;
#endif
#endif
}
}
template
<
typename
Function
,
typename
State
>
template
<
typename
Function
,
typename
State
>
void
*
thread
<
Function
,
State
>::
start_pthread_internal
(
void
*
thread_pointer
)
{
void
*
thread
<
Function
,
State
>::
start_pthread_internal
(
void
*
thread_pointer
)
{
auto
my_thread
=
reinterpret_cast
<
thread
*>
(
thread_pointer
);
auto
my_thread
=
reinterpret_cast
<
thread
*>
(
thread_pointer
);
Function
my_function_copy
=
my_thread
->
function_
;
Function
my_function_copy
=
my_thread
->
function_
;
State
*
my_state_pointer_copy
=
my_thread
->
state_pointer_
;
State
*
my_state_pointer_copy
=
my_thread
->
state_pointer_
;
// Now we have copies of everything we need on the stack.
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
// The original thread object can be moved freely (no more
// references to its memory location).
// references to its memory location).
my_thread
->
startup_flag_
->
clear
();
my_thread
->
startup_flag_
->
clear
();
this_thread
::
set_state
(
my_state_pointer_copy
);
this_thread
::
set_state
(
my_state_pointer_copy
);
my_function_copy
();
my_function_copy
();
// Finished executing the user function
// Finished executing the user function
pthread_exit
(
nullptr
);
pthread_exit
(
nullptr
);
}
}
template
<
typename
Function
,
typename
State
>
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>::
thread
(
const
Function
&
function
,
State
*
state_pointer
)
:
thread
<
Function
,
State
>::
thread
(
const
Function
&
function
,
State
*
state_pointer
)
:
function_
{
function
},
function_
{
function
},
state_pointer_
{
state_pointer
},
state_pointer_
{
state_pointer
},
startup_flag_
{
nullptr
},
startup_flag_
{
nullptr
},
pthread_thread_
{}
{
pthread_thread_
{}
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
if
(
!
this_thread
::
local_storage_key_initialized_
)
{
if
(
!
this_thread
::
local_storage_key_initialized_
)
{
pthread_key_create
(
&
this_thread
::
local_storage_key_
,
nullptr
);
pthread_key_create
(
&
this_thread
::
local_storage_key_
,
nullptr
);
this_thread
::
local_storage_key_initialized_
=
true
;
this_thread
::
local_storage_key_initialized_
=
true
;
}
}
#endif
#endif
// We only need this during startup, will be destroyed when out of scope
// We only need this during startup, will be destroyed when out of scope
std
::
atomic_flag
startup_flag
{
ATOMIC_FLAG_INIT
};
std
::
atomic_flag
startup_flag
{
ATOMIC_FLAG_INIT
};
startup_flag_
=
&
startup_flag
;
startup_flag_
=
&
startup_flag
;
startup_flag
.
test_and_set
();
// Set the flag, pthread will clear it when it is safe to return
startup_flag
.
test_and_set
();
// Set the flag, pthread will clear it when it is safe to return
pthread_create
(
&
pthread_thread_
,
nullptr
,
start_pthread_internal
,
(
void
*
)
(
this
));
pthread_create
(
&
pthread_thread_
,
nullptr
,
start_pthread_internal
,
(
void
*
)
(
this
));
while
(
startup_flag
.
test_and_set
())
while
(
startup_flag
.
test_and_set
());
// Busy waiting for the starting flag to clear
;
// Busy waiting for the starting flag to clear
}
}
template
<
typename
Function
,
typename
State
>
template
<
typename
Function
,
typename
State
>
void
thread
<
Function
,
State
>::
join
()
{
void
thread
<
Function
,
State
>::
join
()
{
pthread_join
(
pthread_thread_
,
nullptr
);
pthread_join
(
pthread_thread_
,
nullptr
);
}
}
template
<
typename
Function
,
typename
State
>
template
<
typename
Function
,
typename
State
>
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
)
{
thread
<
Function
,
State
>
start_thread
(
const
Function
&
function
,
State
*
state_pointer
)
{
return
thread
<
Function
,
State
>
(
function
,
state_pointer
);
return
thread
<
Function
,
State
>
(
function
,
state_pointer
);
}
}
template
<
typename
Function
>
template
<
typename
Function
>
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
)
{
thread
<
Function
,
void
>
start_thread
(
const
Function
&
function
)
{
return
thread
<
Function
,
void
>
(
function
,
nullptr
);
return
thread
<
Function
,
void
>
(
function
,
nullptr
);
}
}
}
}
}
}
}
}
#endif //PLS_THREAD_IMPL_H
#endif //PLS_THREAD_IMPL_H
lib/pls/include/pls/internal/base/ttas_spin_lock.h
View file @
aa270645
...
@@ -8,30 +8,28 @@
...
@@ -8,30 +8,28 @@
#include "pls/internal/base/thread.h"
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
/**
/**
* A simple set and test_and_set based spin lock implementation.
* A simple set and test_and_set based spin lock implementation.
*
*
* PORTABILITY:
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
* Current implementation is based on C++ 11 atomic_flag.
*/
*/
class
ttas_spin_lock
{
class
ttas_spin_lock
{
std
::
atomic
<
int
>
flag_
;
std
::
atomic
<
int
>
flag_
;
const
unsigned
int
yield_at_tries_
;
const
unsigned
int
yield_at_tries_
;
public
:
public
:
ttas_spin_lock
()
:
flag_
{
0
},
yield_at_tries_
{
1024
}
{};
ttas_spin_lock
()
:
flag_
{
0
},
yield_at_tries_
{
1024
}
{};
ttas_spin_lock
(
const
ttas_spin_lock
&
other
)
:
flag_
{
0
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
ttas_spin_lock
(
const
ttas_spin_lock
&
other
)
:
flag_
{
0
},
yield_at_tries_
{
other
.
yield_at_tries_
}
{}
void
lock
();
void
lock
();
bool
try_lock
(
unsigned
int
num_tries
=
1
);
bool
try_lock
(
unsigned
int
num_tries
=
1
);
void
unlock
();
void
unlock
();
};
};
}
}
}
}
}
}
#endif //PLS_TTAS_SPIN_LOCK_H
#endif //PLS_TTAS_SPIN_LOCK_H
lib/pls/include/pls/internal/data_structures/aligned_stack.h
View file @
aa270645
...
@@ -9,45 +9,48 @@
...
@@ -9,45 +9,48 @@
#include "pls/internal/base/alignment.h"
#include "pls/internal/base/alignment.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
data_structures
{
namespace
data_structures
{
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
/**
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* (as long as one can live with the stack restrictions).
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
*
* (as long as one can live with the stack restrictions).
* IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects!
*
*
* IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects!
* Usage:
*
* aligned_stack stack{pointer_to_memory, size_of_memory};
* Usage:
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* aligned_stack stack{pointer_to_memory, size_of_memory};
* stack.pop<T>(); // Deconstruct the top object of type T
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
*/
* stack.pop<T>(); // Deconstruct the top object of type T
class
aligned_stack
{
*/
// Keep bounds of our memory block
class
aligned_stack
{
char
*
memory_start_
;
// Keep bounds of our memory block
char
*
memory_end_
;
char
*
memory_start_
;
char
*
memory_end_
;
// Current head will always be aligned to cache lines
char
*
head_
;
// Current head will always be aligned to cache lines
public
:
char
*
head_
;
typedef
char
*
state
;
public
:
typedef
char
*
state
;
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
template
<
typename
T
>
T
*
push
(
const
T
&
object
);
template
<
typename
T
>
template
<
typename
T
>
T
*
push
(
const
T
&
object
);
void
*
push
();
template
<
typename
T
>
template
<
typename
T
>
void
*
push
();
T
pop
();
template
<
typename
T
>
T
pop
();
state
save_state
()
const
{
return
head_
;
}
void
reset_state
(
state
new_state
)
{
head_
=
new_state
;
}
state
save_state
()
const
{
return
head_
;
}
};
void
reset_state
(
state
new_state
)
{
head_
=
new_state
;
}
}
};
}
}
}
}
}
#include "aligned_stack_impl.h"
#include "aligned_stack_impl.h"
...
...
lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
View file @
aa270645
...
@@ -3,34 +3,36 @@
...
@@ -3,34 +3,36 @@
#define PLS_ALIGNED_STACK_IMPL_H
#define PLS_ALIGNED_STACK_IMPL_H
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
data_structures
{
namespace
data_structures
{
template
<
typename
T
>
T
*
aligned_stack
::
push
(
const
T
&
object
)
{
template
<
typename
T
>
// Copy-Construct
T
*
aligned_stack
::
push
(
const
T
&
object
)
{
return
new
((
void
*
)
push
<
T
>
())
T
(
object
);
// Copy-Construct
}
return
new
((
void
*
)
push
<
T
>
())
T
(
object
);
}
template
<
typename
T
>
void
*
aligned_stack
::
push
()
{
template
<
typename
T
>
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
void
*
aligned_stack
::
push
()
{
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
// Move head to next aligned position after new object
head_
=
base
::
alignment
::
next_alignment
(
head_
+
sizeof
(
T
));
// Move head to next aligned position after new object
if
(
head_
>=
memory_end_
)
{
head_
=
base
::
alignment
::
next_alignment
(
head_
+
sizeof
(
T
));
PLS_ERROR
(
"Tried to allocate object on alligned_stack without sufficient memory!"
);
if
(
head_
>=
memory_end_
)
{
}
PLS_ERROR
(
"Tried to allocate object on alligned_stack without sufficient memory!"
);
}
return
result
;
}
return
result
;
}
template
<
typename
T
>
T
aligned_stack
::
pop
()
{
template
<
typename
T
>
head_
=
head_
-
base
::
alignment
::
next_alignment
(
sizeof
(
T
));
T
aligned_stack
::
pop
()
{
return
*
reinterpret_cast
<
T
*>
(
head_
);
head_
=
head_
-
base
::
alignment
::
next_alignment
(
sizeof
(
T
));
}
return
*
reinterpret_cast
<
T
*>
(
head_
);
}
}
}
}
}
}
}
#endif //PLS_ALIGNED_STACK_IMPL_H
#endif //PLS_ALIGNED_STACK_IMPL_H
lib/pls/include/pls/internal/data_structures/deque.h
View file @
aa270645
...
@@ -5,56 +5,58 @@
...
@@ -5,56 +5,58 @@
#include "pls/internal/base/spin_lock.h"
#include "pls/internal/base/spin_lock.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
data_structures
{
namespace
data_structures
{
/**
* Turns any object into deque item when inheriting from this.
/**
*/
* Turns any object into deque item when inheriting from this.
class
deque_item
{
*/
friend
class
deque_internal
;
class
deque_item
{
friend
class
deque_internal
;
deque_item
*
prev_
;
deque_item
*
next_
;
deque_item
*
prev_
;
deque_item
*
next_
;
};
};
class
deque_internal
{
protected
:
class
deque_internal
{
deque_item
*
head_
;
protected
:
deque_item
*
tail_
;
deque_item
*
head_
;
deque_item
*
tail_
;
base
::
spin_lock
lock_
;
base
::
spin_lock
lock_
;
deque_item
*
pop_head_internal
();
deque_item
*
pop_tail_internal
();
deque_item
*
pop_head_internal
();
void
push_tail_internal
(
deque_item
*
new_item
);
deque_item
*
pop_tail_internal
();
};
void
push_tail_internal
(
deque_item
*
new_item
);
};
/**
* A double linked list based deque.
/**
* Storage is therefore only needed for the individual items.
* A double linked list based deque.
*
* Storage is therefore only needed for the individual items.
* @tparam Item The type of items stored in this deque
*
*/
* @tparam Item The type of items stored in this deque
template
<
typename
Item
>
*/
class
deque
:
deque_internal
{
template
<
typename
Item
>
public
:
class
deque
:
deque_internal
{
explicit
deque
()
:
deque_internal
{}
{}
public
:
explicit
deque
()
:
deque_internal
{}
{}
inline
Item
*
pop_head
()
{
return
static_cast
<
Item
*>
(
pop_head_internal
());
inline
Item
*
pop_head
()
{
}
return
static_cast
<
Item
*>
(
pop_head_internal
());
}
inline
Item
*
pop_tail
()
{
return
static_cast
<
Item
*>
(
pop_tail_internal
());
inline
Item
*
pop_tail
()
{
}
return
static_cast
<
Item
*>
(
pop_tail_internal
());
}
inline
void
push_tail
(
Item
*
new_item
)
{
push_tail_internal
(
new_item
);
inline
void
push_tail
(
Item
*
new_item
)
{
}
push_tail_internal
(
new_item
);
};
}
}
};
}
}
}
}
}
#endif //PLS_DEQUE_H
#endif //PLS_DEQUE_H
lib/pls/include/pls/internal/helpers/mini_benchmark.h
View file @
aa270645
...
@@ -9,45 +9,47 @@
...
@@ -9,45 +9,47 @@
#include <iostream>
#include <iostream>
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
helpers
{
namespace
helpers
{
// TODO: Clean up (separate into small functions and .cpp file)
template
<
typename
Function
>
// TODO: Clean up (separate into small functions and .cpp file)
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
unsigned
long
max_runtime_ms
=
1000
)
{
template
<
typename
Function
>
using
namespace
std
;
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
unsigned
long
max_runtime_ms
=
1000
)
{
using
namespace
pls
::
internal
::
scheduling
;
using
namespace
std
;
using
namespace
pls
::
internal
::
scheduling
;
malloc_scheduler_memory
scheduler_memory
{
max_threads
};
for
(
unsigned
int
num_threads
=
1
;
num_threads
<=
max_threads
;
num_threads
++
)
{
malloc_scheduler_memory
scheduler_memory
{
max_threads
};
scheduler
local_scheduler
{
&
scheduler_memory
,
num_threads
};
for
(
unsigned
int
num_threads
=
1
;
num_threads
<=
max_threads
;
num_threads
++
)
{
scheduler
local_scheduler
{
&
scheduler_memory
,
num_threads
};
chrono
::
high_resolution_clock
::
time_point
start_time
;
chrono
::
high_resolution_clock
::
time_point
end_time
;
chrono
::
high_resolution_clock
::
time_point
start_time
;
unsigned
long
iterations
=
0
;
chrono
::
high_resolution_clock
::
time_point
end_time
;
local_scheduler
.
perform_work
([
&
]
{
unsigned
long
iterations
=
0
;
start_time
=
chrono
::
high_resolution_clock
::
now
();
local_scheduler
.
perform_work
([
&
]
{
end_time
=
start_time
;
start_time
=
chrono
::
high_resolution_clock
::
now
();
chrono
::
high_resolution_clock
::
time_point
planned_end_time
=
start_time
+
chrono
::
milliseconds
(
max_runtime_ms
);
end_time
=
start_time
;
chrono
::
high_resolution_clock
::
time_point
planned_end_time
=
start_time
+
chrono
::
milliseconds
(
max_runtime_ms
);
while
(
end_time
<
planned_end_time
)
{
lambda
();
while
(
end_time
<
planned_end_time
)
{
end_time
=
chrono
::
high_resolution_clock
::
now
();
lambda
();
iterations
++
;
end_time
=
chrono
::
high_resolution_clock
::
now
();
}
iterations
++
;
});
}
});
long
time
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
end_time
-
start_time
).
count
();
double
time_per_iteration
=
(
double
)
time
/
iterations
;
long
time
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
end_time
-
start_time
).
count
();
double
time_per_iteration
=
(
double
)
time
/
iterations
;
std
::
cout
<<
time_per_iteration
;
if
(
num_threads
<
max_threads
)
{
std
::
cout
<<
time_per_iteration
;
std
::
cout
<<
","
;
if
(
num_threads
<
max_threads
)
{
}
std
::
cout
<<
","
;
}
std
::
cout
<<
std
::
endl
;
}
}
}
}
}
std
::
cout
<<
std
::
endl
;
}
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
#endif //PLS_MINI_BENCHMARK_H
lib/pls/include/pls/internal/helpers/prohibit_new.h
View file @
aa270645
...
@@ -15,9 +15,9 @@
...
@@ -15,9 +15,9 @@
#ifdef NEW_LINK_ERROR
#ifdef NEW_LINK_ERROR
// This will cause a linker error if new is used in the code.
// This will cause a linker error if new is used in the code.
// We also exit if it is somehow still called.
// We also exit if it is somehow still called.
inline
void
*
operator
new
(
std
::
size_t
)
{
inline
void
*
operator
new
(
std
::
size_t
)
{
extern
int
bare_new_erroneously_called
();
extern
int
bare_new_erroneously_called
();
exit
(
bare_new_erroneously_called
()
|
1
);
exit
(
bare_new_erroneously_called
()
|
1
);
}
}
#else
#else
// Use this + debugging point to find out where we use a new
// Use this + debugging point to find out where we use a new
...
...
lib/pls/include/pls/internal/helpers/unique_id.h
View file @
aa270645
...
@@ -7,25 +7,27 @@
...
@@ -7,25 +7,27 @@
#include <stdint.h>
#include <stdint.h>
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
helpers
{
namespace
helpers
{
struct
unique_id
{
const
uint32_t
id_
;
const
std
::
type_info
&
type_
;
bool
operator
==
(
const
unique_id
&
other
)
const
{
return
id_
==
other
.
id_
&&
type_
==
other
.
type_
;
}
static
constexpr
unique_id
create
(
const
uint32_t
id
)
{
struct
unique_id
{
return
unique_id
(
id
,
typeid
(
void
));
const
uint32_t
id_
;
}
const
std
::
type_info
&
type_
;
template
<
typename
...
T
>
bool
operator
==
(
const
unique_id
&
other
)
const
{
return
id_
==
other
.
id_
&&
type_
==
other
.
type_
;
}
static
constexpr
unique_id
create
()
{
return
unique_id
(
UINT32_MAX
,
typeid
(
std
::
tuple
<
T
...
>
));
static
constexpr
unique_id
create
(
const
uint32_t
id
)
{
}
return
unique_id
(
id
,
typeid
(
void
));
private
:
}
explicit
constexpr
unique_id
(
const
uint32_t
id
,
const
std
::
type_info
&
type
)
:
id_
{
id
},
type_
{
type
}
{};
template
<
typename
...
T
>
};
static
constexpr
unique_id
create
()
{
}
return
unique_id
(
UINT32_MAX
,
typeid
(
std
::
tuple
<
T
...
>
));
}
}
private
:
explicit
constexpr
unique_id
(
const
uint32_t
id
,
const
std
::
type_info
&
type
)
:
id_
{
id
},
type_
{
type
}
{};
};
}
}
}
}
#endif //PLS_UNIQUE_ID_H
#endif //PLS_UNIQUE_ID_H
lib/pls/include/pls/internal/scheduling/abstract_task.h
View file @
aa270645
...
@@ -6,38 +6,40 @@
...
@@ -6,38 +6,40 @@
#include "pls/internal/helpers/unique_id.h"
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
class
abstract_task
{
public
:
class
abstract_task
{
using
id
=
helpers
::
unique_id
;
public
:
using
id
=
helpers
::
unique_id
;
private
:
unsigned
int
depth_
;
private
:
abstract_task
::
id
unique_id_
;
unsigned
int
depth_
;
abstract_task
*
child_task_
;
abstract_task
::
id
unique_id_
;
abstract_task
*
child_task_
;
public
:
abstract_task
(
const
unsigned
int
depth
,
const
abstract_task
::
id
&
unique_id
)
:
public
:
depth_
{
depth
},
abstract_task
(
const
unsigned
int
depth
,
const
abstract_task
::
id
&
unique_id
)
:
unique_id_
{
unique_id
},
depth_
{
depth
},
child_task_
{
nullptr
}
{}
unique_id_
{
unique_id
},
child_task_
{
nullptr
}
{}
virtual
void
execute
()
=
0
;
void
set_child
(
abstract_task
*
child_task
)
{
child_task_
=
child_task
;
}
virtual
void
execute
()
=
0
;
abstract_task
*
child
()
{
return
child_task_
;
}
void
set_child
(
abstract_task
*
child_task
)
{
child_task_
=
child_task
;
}
abstract_task
*
child
()
{
return
child_task_
;
}
void
set_depth
(
unsigned
int
depth
)
{
depth_
=
depth
;
}
unsigned
int
depth
()
const
{
return
depth_
;
}
void
set_depth
(
unsigned
int
depth
)
{
depth_
=
depth
;
}
id
unique_id
()
const
{
return
unique_id_
;
}
unsigned
int
depth
()
const
{
return
depth_
;
}
protected
:
id
unique_id
()
const
{
return
unique_id_
;
}
virtual
bool
internal_stealing
(
abstract_task
*
other_task
)
=
0
;
protected
:
virtual
bool
split_task
(
base
::
spin_lock
*
lock
)
=
0
;
virtual
bool
internal_stealing
(
abstract_task
*
other_task
)
=
0
;
virtual
bool
split_task
(
base
::
spin_lock
*
lock
)
=
0
;
bool
steal_work
();
};
bool
steal_work
();
}
};
}
}
}
}
}
#endif //PLS_ABSTRACT_TASK_H
#endif //PLS_ABSTRACT_TASK_H
lib/pls/include/pls/internal/scheduling/fork_join_task.h
View file @
aa270645
...
@@ -11,87 +11,89 @@
...
@@ -11,87 +11,89 @@
#include "thread_state.h"
#include "thread_state.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
class
fork_join_task
;
class
fork_join_sub_task
:
public
data_structures
::
deque_item
{
class
fork_join_task
;
friend
class
fork_join_task
;
class
fork_join_sub_task
:
public
data_structures
::
deque_item
{
friend
class
fork_join_task
;
// Coordinate finishing of sub_tasks
std
::
atomic_uint32_t
ref_count_
;
// Coordinate finishing of sub_tasks
fork_join_sub_task
*
parent_
;
std
::
atomic_uint32_t
ref_count_
;
fork_join_sub_task
*
parent_
;
// Access to TBB scheduling environment
fork_join_task
*
tbb_task_
;
// Access to TBB scheduling environment
fork_join_task
*
tbb_task_
;
// Stack Management (reset stack pointer after wait_for_all() calls)
data_structures
::
aligned_stack
::
state
stack_state_
;
// Stack Management (reset stack pointer after wait_for_all() calls)
protected
:
data_structures
::
aligned_stack
::
state
stack_state_
;
explicit
fork_join_sub_task
();
protected
:
fork_join_sub_task
(
const
fork_join_sub_task
&
other
);
explicit
fork_join_sub_task
();
fork_join_sub_task
(
const
fork_join_sub_task
&
other
);
// Overwritten with behaviour of child tasks
virtual
void
execute_internal
()
=
0
;
// Overwritten with behaviour of child tasks
virtual
void
execute_internal
()
=
0
;
public
:
// Only use them when actually executing this sub_task (only public for simpler API design)
public
:
template
<
typename
T
>
// Only use them when actually executing this sub_task (only public for simpler API design)
void
spawn_child
(
const
T
&
sub_task
);
template
<
typename
T
>
void
wait_for_all
();
void
spawn_child
(
const
T
&
sub_task
);
void
wait_for_all
();
private
:
void
spawn_child_internal
(
fork_join_sub_task
*
sub_task
);
private
:
void
execute
();
void
spawn_child_internal
(
fork_join_sub_task
*
sub_task
);
};
void
execute
();
};
template
<
typename
Function
>
class
fork_join_lambda
:
public
fork_join_sub_task
{
template
<
typename
Function
>
const
Function
*
function_
;
class
fork_join_lambda
:
public
fork_join_sub_task
{
const
Function
*
function_
;
public
:
explicit
fork_join_lambda
(
const
Function
*
function
)
:
function_
{
function
}
{};
public
:
explicit
fork_join_lambda
(
const
Function
*
function
)
:
function_
{
function
}
{};
protected
:
void
execute_internal
()
override
{
protected
:
(
*
function_
)(
this
);
void
execute_internal
()
override
{
}
(
*
function_
)(
this
);
};
}
};
class
fork_join_task
:
public
abstract_task
{
friend
class
fork_join_sub_task
;
class
fork_join_task
:
public
abstract_task
{
friend
class
fork_join_sub_task
;
fork_join_sub_task
*
root_task_
;
fork_join_sub_task
*
currently_executing_
;
fork_join_sub_task
*
root_task_
;
data_structures
::
aligned_stack
*
my_stack_
;
fork_join_sub_task
*
currently_executing_
;
data_structures
::
aligned_stack
*
my_stack_
;
// Double-Ended Queue management
data_structures
::
deque
<
fork_join_sub_task
>
deque_
;
// Double-Ended Queue management
data_structures
::
deque
<
fork_join_sub_task
>
deque_
;
// Steal Management
fork_join_sub_task
*
last_stolen_
;
// Steal Management
fork_join_sub_task
*
last_stolen_
;
fork_join_sub_task
*
get_local_sub_task
();
fork_join_sub_task
*
get_stolen_sub_task
();
fork_join_sub_task
*
get_local_sub_task
();
fork_join_sub_task
*
get_stolen_sub_task
();
bool
internal_stealing
(
abstract_task
*
other_task
)
override
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
;
bool
internal_stealing
(
abstract_task
*
other_task
)
override
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
;
public
:
explicit
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
);
public
:
void
execute
()
override
;
explicit
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
);
fork_join_sub_task
*
currently_executing
()
const
;
void
execute
()
override
;
};
fork_join_sub_task
*
currently_executing
()
const
;
};
template
<
typename
T
>
void
fork_join_sub_task
::
spawn_child
(
const
T
&
task
)
{
template
<
typename
T
>
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
void
fork_join_sub_task
::
spawn_child
(
const
T
&
task
)
{
static_assert
(
std
::
is_base_of
<
fork_join_sub_task
,
T
>::
value
,
"Only pass fork_join_sub_task subclasses!"
);
PROFILE_FORK_JOIN_STEALING
(
"spawn_child"
)
static_assert
(
std
::
is_base_of
<
fork_join_sub_task
,
T
>::
value
,
"Only pass fork_join_sub_task subclasses!"
);
T
*
new_task
=
tbb_task_
->
my_stack_
->
push
(
task
);
spawn_child_internal
(
new_task
);
T
*
new_task
=
tbb_task_
->
my_stack_
->
push
(
task
);
}
spawn_child_internal
(
new_task
);
}
}
}
}
}
}
}
#endif //PLS_TBB_LIKE_TASK_H
#endif //PLS_TBB_LIKE_TASK_H
lib/pls/include/pls/internal/scheduling/root_task.h
View file @
aa270645
...
@@ -10,71 +10,73 @@
...
@@ -10,71 +10,73 @@
#include "abstract_task.h"
#include "abstract_task.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
template
<
typename
Function
>
class
root_task
:
public
abstract_task
{
template
<
typename
Function
>
Function
function_
;
class
root_task
:
public
abstract_task
{
std
::
atomic_uint8_t
finished_
;
Function
function_
;
public
:
std
::
atomic_uint8_t
finished_
;
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
root_task
<
Function
>>
;
public
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
root_task
<
Function
>>
;
explicit
root_task
(
Function
function
)
:
abstract_task
{
0
,
create_id
()},
explicit
root_task
(
Function
function
)
:
function_
{
function
},
abstract_task
{
0
,
create_id
()},
finished_
{
0
}
{}
function_
{
function
},
root_task
(
const
root_task
&
other
)
:
finished_
{
0
}
{}
abstract_task
{
0
,
create_id
()},
root_task
(
const
root_task
&
other
)
:
function_
{
other
.
function_
},
abstract_task
{
0
,
create_id
()},
finished_
{
0
}
{}
function_
{
other
.
function_
},
finished_
{
0
}
{}
bool
finished
()
{
return
finished_
;
bool
finished
()
{
}
return
finished_
;
}
void
execute
()
override
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
void
execute
()
override
{
function_
();
PROFILE_WORK_BLOCK
(
"execute root_task"
);
finished_
=
1
;
function_
();
}
finished_
=
1
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
}
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
}
return
false
;
};
}
};
template
<
typename
Function
>
class
root_worker_task
:
public
abstract_task
{
template
<
typename
Function
>
root_task
<
Function
>*
master_task_
;
class
root_worker_task
:
public
abstract_task
{
root_task
<
Function
>
*
master_task_
;
public
:
static
constexpr
auto
create_id
=
root_task
<
Function
>::
create_id
;
public
:
static
constexpr
auto
create_id
=
root_task
<
Function
>::
create_id
;
explicit
root_worker_task
(
root_task
<
Function
>*
master_task
)
:
abstract_task
{
0
,
create_id
()},
explicit
root_worker_task
(
root_task
<
Function
>
*
master_task
)
:
master_task_
{
master_task
}
{}
abstract_task
{
0
,
create_id
()},
master_task_
{
master_task
}
{}
void
execute
()
override
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
void
execute
()
override
{
do
{
PROFILE_WORK_BLOCK
(
"execute root_task"
);
steal_work
();
do
{
}
while
(
!
master_task_
->
finished
());
steal_work
();
}
}
while
(
!
master_task_
->
finished
());
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
}
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
}
return
false
;
};
}
}
};
}
}
}
}
}
#endif //PLS_ROOT_MASTER_TASK_H
#endif //PLS_ROOT_MASTER_TASK_H
lib/pls/include/pls/internal/scheduling/run_on_n_threads_task.h
View file @
aa270645
...
@@ -12,107 +12,110 @@
...
@@ -12,107 +12,110 @@
#include "scheduler.h"
#include "scheduler.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
template
<
typename
Function
>
class
run_on_n_threads_task
:
public
abstract_task
{
template
<
typename
Function
>
template
<
typename
F
>
class
run_on_n_threads_task
:
public
abstract_task
{
friend
class
run_on_n_threads_task_worker
;
template
<
typename
F
>
friend
Function
function_
;
class
run_on_n_threads_task_worker
;
// Improvement: Remove lock and replace by atomic variable (performance)
Function
function_
;
int
counter
;
base
::
spin_lock
counter_lock_
;
// Improvement: Remove lock and replace by atomic variable (performance)
int
counter
;
int
decrement_counter
()
{
base
::
spin_lock
counter_lock_
;
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
counter
--
;
int
decrement_counter
()
{
return
counter
;
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
}
counter
--
;
return
counter
;
int
get_counter
()
{
}
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
return
counter
;
int
get_counter
()
{
}
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
counter_lock_
};
public
:
return
counter
;
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task
<
Function
>>
;
}
public
:
run_on_n_threads_task
(
Function
function
,
int
num_threads
)
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task
<
Function
>>
;
abstract_task
{
0
,
create_id
()},
function_
{
function
},
run_on_n_threads_task
(
Function
function
,
int
num_threads
)
:
counter
{
num_threads
-
1
}
{}
abstract_task
{
0
,
create_id
()},
function_
{
function
},
void
execute
()
override
{
counter
{
num_threads
-
1
}
{}
// Execute our function ONCE
function_
();
void
execute
()
override
{
// Execute our function ONCE
// Steal until we are finished (other threads executed)
function_
();
do
{
steal_work
();
// Steal until we are finished (other threads executed)
}
while
(
get_counter
()
>
0
);
do
{
steal_work
();
std
::
cout
<<
"Finished Master!"
<<
std
::
endl
;
}
while
(
get_counter
()
>
0
);
}
std
::
cout
<<
"Finished Master!"
<<
std
::
endl
;
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
}
return
false
;
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
bool
split_task
(
base
::
spin_lock
*
lock
)
override
;
}
};
bool
split_task
(
base
::
spin_lock
*
lock
)
override
;
template
<
typename
Function
>
};
class
run_on_n_threads_task_worker
:
public
abstract_task
{
Function
function_
;
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>*
root_
;
class
run_on_n_threads_task_worker
:
public
abstract_task
{
public
:
Function
function_
;
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task_worker
<
Function
>>
;
run_on_n_threads_task
<
Function
>
*
root_
;
public
:
run_on_n_threads_task_worker
(
Function
function
,
run_on_n_threads_task
<
Function
>*
root
)
:
static
constexpr
auto
create_id
=
helpers
::
unique_id
::
create
<
run_on_n_threads_task_worker
<
Function
>>
;
abstract_task
{
0
,
create_id
()},
function_
{
function
},
run_on_n_threads_task_worker
(
Function
function
,
run_on_n_threads_task
<
Function
>
*
root
)
:
root_
{
root
}
{}
abstract_task
{
0
,
create_id
()},
function_
{
function
},
void
execute
()
override
{
root_
{
root
}
{}
if
(
root_
->
decrement_counter
()
>=
0
)
{
function_
();
void
execute
()
override
{
std
::
cout
<<
"Finished Worker!"
<<
std
::
endl
;
if
(
root_
->
decrement_counter
()
>=
0
)
{
}
else
{
function_
();
std
::
cout
<<
"Abandoned Worker!"
<<
std
::
endl
;
std
::
cout
<<
"Finished Worker!"
<<
std
::
endl
;
}
}
else
{
}
std
::
cout
<<
"Abandoned Worker!"
<<
std
::
endl
;
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
template
<
typename
Function
>
bool
run_on_n_threads_task
<
Function
>::
split_task
(
base
::
spin_lock
*
lock
)
{
if
(
get_counter
()
<=
0
)
{
return
false
;
}
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
auto
scheduler
=
base
::
this_thread
::
state
<
thread_state
>
()
->
scheduler_
;
auto
task
=
run_on_n_threads_task_worker
<
Function
>
{
function_
,
this
};
scheduler
->
execute_task
(
task
,
depth
());
return
true
;
}
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>
create_run_on_n_threads_task
(
Function
function
,
int
num_threads
)
{
return
run_on_n_threads_task
<
Function
>
{
function
,
num_threads
};
}
}
}
}
}
bool
internal_stealing
(
abstract_task
*
/*other_task*/
)
override
{
return
false
;
}
bool
split_task
(
base
::
spin_lock
*
/*lock*/
)
override
{
return
false
;
}
};
template
<
typename
Function
>
bool
run_on_n_threads_task
<
Function
>::
split_task
(
base
::
spin_lock
*
lock
)
{
if
(
get_counter
()
<=
0
)
{
return
false
;
}
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
auto
scheduler
=
base
::
this_thread
::
state
<
thread_state
>
()
->
scheduler_
;
auto
task
=
run_on_n_threads_task_worker
<
Function
>
{
function_
,
this
};
scheduler
->
execute_task
(
task
,
depth
());
return
true
;
}
template
<
typename
Function
>
run_on_n_threads_task
<
Function
>
create_run_on_n_threads_task
(
Function
function
,
int
num_threads
)
{
return
run_on_n_threads_task
<
Function
>
{
function
,
num_threads
};
}
}
}
}
}
#endif //PLS_RUN_ON_N_THREADS_TASK_H
#endif //PLS_RUN_ON_N_THREADS_TASK_H
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
aa270645
...
@@ -17,50 +17,52 @@
...
@@ -17,50 +17,52 @@
#include "scheduler_memory.h"
#include "scheduler_memory.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
class
scheduler
{
friend
void
worker_routine
();
class
scheduler
{
friend
void
worker_routine
();
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
const
unsigned
int
num_threads_
;
scheduler_memory
*
memory_
;
base
::
barrier
sync_barrier_
;
bool
terminated_
;
base
::
barrier
sync_barrier_
;
public
:
bool
terminated_
;
explicit
scheduler
(
scheduler_memory
*
memory
,
unsigned
int
num_threads
);
public
:
~
scheduler
();
explicit
scheduler
(
scheduler_memory
*
memory
,
unsigned
int
num_threads
);
~
scheduler
();
/**
* Wakes up the thread pool.
/**
* Code inside the Function lambda can invoke all parallel APIs.
* Wakes up the thread pool.
*
* Code inside the Function lambda can invoke all parallel APIs.
* @param work_section generic function or lambda to be executed in the scheduler's context.
*
*/
* @param work_section generic function or lambda to be executed in the scheduler's context.
template
<
typename
Function
>
*/
void
perform_work
(
Function
work_section
);
template
<
typename
Function
>
void
perform_work
(
Function
work_section
);
/**
* Executes a top-level-task (children of abstract_task) on this thread.
/**
*
* Executes a top-level-task (children of abstract_task) on this thread.
* @param task The task to be executed.
*
* @param depth Optional: depth of the new task, otherwise set implicitly.
* @param task The task to be executed.
*/
* @param depth Optional: depth of the new task, otherwise set implicitly.
template
<
typename
Task
>
*/
static
void
execute_task
(
Task
&
task
,
int
depth
=-
1
);
template
<
typename
Task
>
static
void
execute_task
(
Task
&
task
,
int
depth
=
-
1
);
static
abstract_task
*
current_task
()
{
return
base
::
this_thread
::
state
<
thread_state
>
()
->
current_task_
;
}
static
abstract_task
*
current_task
()
{
return
base
::
this_thread
::
state
<
thread_state
>
()
->
current_task_
;
}
void
terminate
(
bool
wait_for_workers
=
true
);
void
terminate
(
bool
wait_for_workers
=
true
);
unsigned
int
num_threads
()
const
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
unsigned
int
num_threads
()
const
{
return
num_threads_
;
}
};
thread_state
*
thread_state_for
(
size_t
id
)
{
return
memory_
->
thread_state_for
(
id
);
}
}
};
}
}
}
}
}
#include "scheduler_impl.h"
#include "scheduler_impl.h"
...
...
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
aa270645
...
@@ -3,70 +3,72 @@
...
@@ -3,70 +3,72 @@
#define PLS_SCHEDULER_IMPL_H
#define PLS_SCHEDULER_IMPL_H
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
template
<
typename
Function
>
void
scheduler
::
perform_work
(
Function
work_section
)
{
template
<
typename
Function
>
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
void
scheduler
::
perform_work
(
Function
work_section
)
{
root_task
<
Function
>
master
{
work_section
};
PROFILE_WORK_BLOCK
(
"scheduler::perform_work"
)
root_task
<
Function
>
master
{
work_section
};
// Push root task on stacks
auto
new_master
=
memory_
->
task_stack_for
(
0
)
->
push
(
master
);
// Push root task on stacks
memory_
->
thread_state_for
(
0
)
->
root_task_
=
new_master
;
auto
new_master
=
memory_
->
task_stack_for
(
0
)
->
push
(
master
);
memory_
->
thread_state_for
(
0
)
->
current_task_
=
new_master
;
memory_
->
thread_state_for
(
0
)
->
root_task_
=
new_master
;
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
memory_
->
thread_state_for
(
0
)
->
current_task_
=
new_master
;
root_worker_task
<
Function
>
worker
{
new_master
};
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
auto
new_worker
=
memory_
->
task_stack_for
(
0
)
->
push
(
worker
);
root_worker_task
<
Function
>
worker
{
new_master
};
memory_
->
thread_state_for
(
i
)
->
root_task_
=
new_worker
;
auto
new_worker
=
memory_
->
task_stack_for
(
0
)
->
push
(
worker
);
memory_
->
thread_state_for
(
i
)
->
current_task_
=
new_worker
;
memory_
->
thread_state_for
(
i
)
->
root_task_
=
new_worker
;
}
memory_
->
thread_state_for
(
i
)
->
current_task_
=
new_worker
;
}
// Perform and wait for work
sync_barrier_
.
wait
();
// Trigger threads to wake up
// Perform and wait for work
sync_barrier_
.
wait
();
// Wait for threads to finish
sync_barrier_
.
wait
();
// Trigger threads to wake up
sync_barrier_
.
wait
();
// Wait for threads to finish
// Clean up stack
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
master
)
>
();
// Clean up stack
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
master
)
>
();
root_worker_task
<
Function
>
worker
{
new_master
};
for
(
unsigned
int
i
=
1
;
i
<
num_threads_
;
i
++
)
{
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
worker
)
>
();
root_worker_task
<
Function
>
worker
{
new_master
};
}
memory_
->
task_stack_for
(
0
)
->
pop
<
typeof
(
worker
)
>
();
}
}
}
template
<
typename
Task
>
void
scheduler
::
execute_task
(
Task
&
task
,
int
depth
)
{
template
<
typename
Task
>
static_assert
(
std
::
is_base_of
<
abstract_task
,
Task
>::
value
,
"Only pass abstract_task subclasses!"
);
void
scheduler
::
execute_task
(
Task
&
task
,
int
depth
)
{
static_assert
(
std
::
is_base_of
<
abstract_task
,
Task
>::
value
,
"Only pass abstract_task subclasses!"
);
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
abstract_task
*
old_task
;
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
abstract_task
*
new_task
;
abstract_task
*
old_task
;
abstract_task
*
new_task
;
// Init Task
{
// Init Task
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
{
old_task
=
my_state
->
current_task_
;
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
new_task
=
my_state
->
task_stack_
->
push
(
task
);
old_task
=
my_state
->
current_task_
;
new_task
=
my_state
->
task_stack_
->
push
(
task
);
new_task
->
set_depth
(
depth
>=
0
?
depth
:
old_task
->
depth
()
+
1
);
my_state
->
current_task_
=
new_task
;
new_task
->
set_depth
(
depth
>=
0
?
depth
:
old_task
->
depth
()
+
1
);
old_task
->
set_child
(
new_task
);
my_state
->
current_task_
=
new_task
;
}
old_task
->
set_child
(
new_task
);
}
// Run Task
new_task
->
execute
();
// Run Task
new_task
->
execute
();
// Teardown state back to before the task was executed
{
// Teardown state back to before the task was executed
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
my_state
->
lock_
};
old_task
->
set_child
(
nullptr
);
my_state
->
current_task_
=
old_task
;
old_task
->
set_child
(
nullptr
);
my_state
->
current_task_
=
old_task
;
my_state
->
task_stack_
->
pop
<
Task
>
();
}
my_state
->
task_stack_
->
pop
<
Task
>
();
}
}
}
}
}
}
}
}
}
#endif //PLS_SCHEDULER_IMPL_H
#endif //PLS_SCHEDULER_IMPL_H
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
View file @
aa270645
...
@@ -7,72 +7,75 @@
...
@@ -7,72 +7,75 @@
#define PLS_SCHEDULER_MEMORY_H
#define PLS_SCHEDULER_MEMORY_H
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
void
worker_routine
();
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
class
scheduler_memory
{
void
worker_routine
();
public
:
using
scheduler_thread
=
base
::
thread
<
decltype
(
&
worker_routine
),
thread_state
>
;
virtual
size_t
max_threads
()
const
=
0
;
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
};
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
class
scheduler_memory
{
class
static_scheduler_memory
:
public
scheduler_memory
{
public
:
// Everyone of these types has to live on its own cache line,
virtual
size_t
max_threads
()
const
=
0
;
// as each thread uses one of them independently.
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
// Therefore it would be a major performance hit if we shared cache lines on these.
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
};
using
aligned_thread_stack
=
base
::
alignment
::
aligned_wrapper
<
std
::
array
<
char
,
TASK_STACK_SIZE
>>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
std
::
array
<
aligned_thread
,
MAX_THREADS
>
threads_
;
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
std
::
array
<
aligned_thread_state
,
MAX_THREADS
>
thread_states_
;
class
static_scheduler_memory
:
public
scheduler_memory
{
std
::
array
<
aligned_thread_stack
,
MAX_THREADS
>
task_stacks_memory_
;
// Everyone of these types has to live on its own cache line,
std
::
array
<
aligned_aligned_stack
,
MAX_THREADS
>
task_stacks_
;
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_thread_stack
=
base
::
alignment
::
aligned_wrapper
<
std
::
array
<
char
,
TASK_STACK_SIZE
>>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
public
:
std
::
array
<
aligned_thread
,
MAX_THREADS
>
threads_
;
static_scheduler_memory
()
{
std
::
array
<
aligned_thread_state
,
MAX_THREADS
>
thread_states_
;
for
(
size_t
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
std
::
array
<
aligned_thread_stack
,
MAX_THREADS
>
task_stacks_memory_
;
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
TASK_STACK_SIZE
);
std
::
array
<
aligned_aligned_stack
,
MAX_THREADS
>
task_stacks_
;
}
}
size_t
max_threads
()
const
override
{
return
MAX_THREADS
;
}
public
:
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
static_scheduler_memory
()
{
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
for
(
size_t
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
};
TASK_STACK_SIZE
);
}
}
class
malloc_scheduler_memory
:
public
scheduler_memory
{
size_t
max_threads
()
const
override
{
return
MAX_THREADS
;
}
// Everyone of these types has to live on its own cache line,
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
// as each thread uses one of them independently.
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
// Therefore it would be a major performance hit if we shared cache lines on these.
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
};
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
const
size_t
num_threads_
;
class
malloc_scheduler_memory
:
public
scheduler_memory
{
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
aligned_thread
*
threads_
;
const
size_t
num_threads_
;
aligned_thread_state
*
thread_states_
;
char
**
task_stacks_memory_
;
aligned_aligned_stack
*
task_stacks_
;
public
:
explicit
malloc_scheduler_memory
(
size_t
num_threads
,
size_t
memory_per_stack
=
2
<<
16
);
~
malloc_scheduler_memory
();
size_t
max_threads
()
const
override
{
return
num_threads_
;
}
aligned_thread
*
threads_
;
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
aligned_thread_state
*
thread_states_
;
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
char
**
task_stacks_memory_
;
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
aligned_aligned_stack
*
task_stacks_
;
};
public
:
}
explicit
malloc_scheduler_memory
(
size_t
num_threads
,
size_t
memory_per_stack
=
2
<<
16
);
}
~
malloc_scheduler_memory
();
size_t
max_threads
()
const
override
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
();
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
();
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
();
}
};
}
}
}
}
#endif //PLS_SCHEDULER_MEMORY_H
#endif //PLS_SCHEDULER_MEMORY_H
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
aa270645
...
@@ -8,38 +8,40 @@
...
@@ -8,38 +8,40 @@
#include "abstract_task.h"
#include "abstract_task.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
// forward declaration
class
scheduler
;
// forward declaration
class
scheduler
;
struct
thread_state
{
scheduler
*
scheduler_
;
struct
thread_state
{
abstract_task
*
root_task_
;
scheduler
*
scheduler_
;
abstract_task
*
current_task_
;
abstract_task
*
root_task_
;
data_structures
::
aligned_stack
*
task_stack_
;
abstract_task
*
current_task_
;
size_t
id_
;
data_structures
::
aligned_stack
*
task_stack_
;
base
::
spin_lock
lock_
;
size_t
id_
;
std
::
minstd_rand
random_
;
base
::
spin_lock
lock_
;
std
::
minstd_rand
random_
;
thread_state
()
:
scheduler_
{
nullptr
},
thread_state
()
:
root_task_
{
nullptr
},
scheduler_
{
nullptr
},
current_task_
{
nullptr
},
root_task_
{
nullptr
},
task_stack_
{
nullptr
},
current_task_
{
nullptr
},
id_
{
0
},
task_stack_
{
nullptr
},
random_
{
id_
}
{};
id_
{
0
},
random_
{
id_
}
{};
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
scheduler_
{
scheduler
},
thread_state
(
scheduler
*
scheduler
,
data_structures
::
aligned_stack
*
task_stack
,
unsigned
int
id
)
:
root_task_
{
nullptr
},
scheduler_
{
scheduler
},
current_task_
{
nullptr
},
root_task_
{
nullptr
},
task_stack_
{
task_stack
},
current_task_
{
nullptr
},
id_
{
id
},
task_stack_
{
task_stack
},
random_
{
id_
}
{}
id_
{
id
},
};
random_
{
id_
}
{}
}
};
}
}
}
}
}
#endif //PLS_THREAD_STATE_H
#endif //PLS_THREAD_STATE_H
lib/pls/include/pls/pls.h
View file @
aa270645
...
@@ -8,18 +8,20 @@
...
@@ -8,18 +8,20 @@
#include "pls/internal/helpers/unique_id.h"
#include "pls/internal/helpers/unique_id.h"
namespace
pls
{
namespace
pls
{
using
internal
::
scheduling
::
static_scheduler_memory
;
using
internal
::
scheduling
::
malloc_scheduler_memory
;
using
internal
::
scheduling
::
scheduler
;
using
internal
::
scheduling
::
static_scheduler_memory
;
using
task_id
=
internal
::
scheduling
::
abstract_task
::
id
;
using
internal
::
scheduling
::
malloc_scheduler_memory
;
using
unique_id
=
internal
::
helpers
::
unique_id
;
using
internal
::
scheduling
::
scheduler
;
using
task_id
=
internal
::
scheduling
::
abstract_task
::
id
;
using
internal
::
scheduling
::
fork_join_sub_task
;
using
unique_id
=
internal
::
helpers
::
unique_id
;
using
internal
::
scheduling
::
fork_join_task
;
using
internal
::
scheduling
::
fork_join_sub_task
;
using
internal
::
scheduling
::
fork_join_task
;
using
algorithm
::
invoke_parallel
;
using
algorithm
::
invoke_parallel
;
}
}
#endif
#endif
lib/pls/src/internal/base/alignment.cpp
View file @
aa270645
...
@@ -2,26 +2,28 @@
...
@@ -2,26 +2,28 @@
#include "pls/internal/base/system_details.h"
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
namespace
alignment
{
namespace
alignment
{
void
*
allocate_aligned
(
size_t
size
)
{
return
aligned_alloc
(
system_details
::
CACHE_LINE_SIZE
,
size
);
}
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
void
*
allocate_aligned
(
size_t
size
)
{
std
::
uintptr_t
miss_alignment
=
size
%
base
::
system_details
::
CACHE_LINE_SIZE
;
return
aligned_alloc
(
system_details
::
CACHE_LINE_SIZE
,
size
);
if
(
miss_alignment
==
0
)
{
}
return
size
;
}
else
{
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
return
size
+
(
base
::
system_details
::
CACHE_LINE_SIZE
-
miss_alignment
);
std
::
uintptr_t
miss_alignment
=
size
%
base
::
system_details
::
CACHE_LINE_SIZE
;
}
if
(
miss_alignment
==
0
)
{
}
return
size
;
}
else
{
return
size
+
(
base
::
system_details
::
CACHE_LINE_SIZE
-
miss_alignment
);
}
}
char
*
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
char
*
next_alignment
(
char
*
pointer
)
{
}
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
}
}
}
}
}
}
}
lib/pls/src/internal/base/barrier.cpp
View file @
aa270645
#include "pls/internal/base/barrier.h"
#include "pls/internal/base/barrier.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
barrier
::
barrier
(
const
unsigned
int
count
)
:
barrier_
{}
{
pthread_barrier_init
(
&
barrier_
,
nullptr
,
count
);
}
barrier
::~
barrier
()
{
barrier
::
barrier
(
const
unsigned
int
count
)
:
barrier_
{}
{
pthread_barrier_destroy
(
&
barrier_
);
pthread_barrier_init
(
&
barrier_
,
nullptr
,
count
);
}
}
barrier
::~
barrier
()
{
pthread_barrier_destroy
(
&
barrier_
);
}
void
barrier
::
wait
()
{
void
barrier
::
wait
()
{
pthread_barrier_wait
(
&
barrier_
);
pthread_barrier_wait
(
&
barrier_
);
}
}
}
}
}
}
}
}
lib/pls/src/internal/base/tas_spin_lock.cpp
View file @
aa270645
...
@@ -2,33 +2,35 @@
...
@@ -2,33 +2,35 @@
#include "pls/internal/base/tas_spin_lock.h"
#include "pls/internal/base/tas_spin_lock.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
void
tas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
int
tries
=
0
;
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
tries
++
;
if
(
tries
%
yield_at_tries_
==
0
)
{
this_thread
::
yield
();
}
}
}
bool
tas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
void
tas_spin_lock
::
lock
(
)
{
PROFILE_LOCK
(
"Try
Acquire Lock"
)
PROFILE_LOCK
(
"
Acquire Lock"
)
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
int
tries
=
0
;
num_tries
--
;
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
if
(
num_tries
<=
0
)
{
tries
++
;
return
false
;
if
(
tries
%
yield_at_tries_
==
0
)
{
}
this_thread
::
yield
();
}
}
return
true
;
}
}
}
void
tas_spin_lock
::
unlock
()
{
bool
tas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
flag_
.
clear
(
std
::
memory_order_release
);
PROFILE_LOCK
(
"Try Acquire Lock"
)
}
while
(
flag_
.
test_and_set
(
std
::
memory_order_acquire
))
{
}
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
}
return
true
;
}
void
tas_spin_lock
::
unlock
()
{
flag_
.
clear
(
std
::
memory_order_release
);
}
}
}
}
}
lib/pls/src/internal/base/thread.cpp
View file @
aa270645
#include "pls/internal/base/thread.h"
#include "pls/internal/base/thread.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_key_t
this_thread
::
local_storage_key_
=
false
;
pthread_key_t
this_thread
::
local_storage_key_
=
false
;
bool
this_thread
::
local_storage_key_initialized_
;
bool
this_thread
::
local_storage_key_initialized_
;
#endif
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
#ifdef PLS_THREAD_SPECIFIC_COMPILER
__thread
void
*
this_thread
::
local_state_
;
__thread
void
*
this_thread
::
local_state_
;
#endif
#endif
// implementation in header (C++ templating)
// implementation in header (C++ templating)
}
}
}
}
}
}
lib/pls/src/internal/base/ttas_spin_lock.cpp
View file @
aa270645
...
@@ -2,46 +2,48 @@
...
@@ -2,46 +2,48 @@
#include "pls/internal/base/ttas_spin_lock.h"
#include "pls/internal/base/ttas_spin_lock.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
base
{
namespace
base
{
void
ttas_spin_lock
::
lock
()
{
PROFILE_LOCK
(
"Acquire Lock"
)
void
ttas_spin_lock
::
lock
()
{
int
tries
=
0
;
PROFILE_LOCK
(
"Acquire Lock"
)
int
expected
=
0
;
int
tries
=
0
;
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
do
{
tries
++
;
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
if
(
tries
%
yield_at_tries_
==
0
)
{
tries
++
;
this_thread
::
yield
();
if
(
tries
%
yield_at_tries_
==
0
)
{
}
this_thread
::
yield
();
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
}
bool
ttas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
return
true
;
}
void
ttas_spin_lock
::
unlock
()
{
flag_
.
store
(
0
,
std
::
memory_order_release
);
}
}
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
}
bool
ttas_spin_lock
::
try_lock
(
unsigned
int
num_tries
)
{
PROFILE_LOCK
(
"Try Acquire Lock"
)
int
expected
=
0
;
do
{
while
(
flag_
.
load
(
std
::
memory_order_relaxed
)
==
1
)
{
num_tries
--
;
if
(
num_tries
<=
0
)
{
return
false
;
}
}
expected
=
0
;
}
while
(
!
flag_
.
compare_exchange_weak
(
expected
,
1
,
std
::
memory_order_acquire
));
return
true
;
}
void
ttas_spin_lock
::
unlock
()
{
flag_
.
store
(
0
,
std
::
memory_order_release
);
}
}
}
}
}
lib/pls/src/internal/data_structures/aligned_stack.cpp
View file @
aa270645
...
@@ -2,12 +2,14 @@
...
@@ -2,12 +2,14 @@
#include "pls/internal/base/system_details.h"
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
data_structures
{
namespace
data_structures
{
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
memory_start_
{
memory_region
},
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
memory_end_
{
memory_region
+
size
},
memory_start_
{
memory_region
},
head_
{
base
::
alignment
::
next_alignment
(
memory_start_
)}
{}
memory_end_
{
memory_region
+
size
},
}
head_
{
base
::
alignment
::
next_alignment
(
memory_start_
)}
{}
}
}
}
}
}
lib/pls/src/internal/data_structures/deque.cpp
View file @
aa270645
...
@@ -3,56 +3,58 @@
...
@@ -3,56 +3,58 @@
#include "pls/internal/data_structures/deque.h"
#include "pls/internal/data_structures/deque.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
data_structures
{
namespace
data_structures
{
deque_item
*
deque_internal
::
pop_head_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
deque_item
*
deque_internal
::
pop_head_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
head_
==
nullptr
)
{
return
nullptr
;
if
(
head_
==
nullptr
)
{
}
return
nullptr
;
}
deque_item
*
result
=
head_
;
head_
=
head_
->
prev_
;
deque_item
*
result
=
head_
;
if
(
head_
==
nullptr
)
{
head_
=
head_
->
prev_
;
tail_
=
nullptr
;
if
(
head_
==
nullptr
)
{
}
else
{
tail_
=
nullptr
;
head_
->
next_
=
nullptr
;
}
else
{
}
head_
->
next_
=
nullptr
;
}
return
result
;
}
return
result
;
}
deque_item
*
deque_internal
::
pop_tail_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
deque_item
*
deque_internal
::
pop_tail_internal
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
==
nullptr
)
{
return
nullptr
;
if
(
tail_
==
nullptr
)
{
}
return
nullptr
;
}
deque_item
*
result
=
tail_
;
tail_
=
tail_
->
next_
;
deque_item
*
result
=
tail_
;
if
(
tail_
==
nullptr
)
{
tail_
=
tail_
->
next_
;
head_
=
nullptr
;
if
(
tail_
==
nullptr
)
{
}
else
{
head_
=
nullptr
;
tail_
->
prev_
=
nullptr
;
}
else
{
}
tail_
->
prev_
=
nullptr
;
}
return
result
;
}
return
result
;
}
void
deque_internal
::
push_tail_internal
(
deque_item
*
new_item
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
void
deque_internal
::
push_tail_internal
(
deque_item
*
new_item
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
if
(
tail_
!=
nullptr
)
{
tail_
->
prev_
=
new_item
;
if
(
tail_
!=
nullptr
)
{
}
else
{
tail_
->
prev_
=
new_item
;
head_
=
new_item
;
}
else
{
}
head_
=
new_item
;
new_item
->
next_
=
tail_
;
}
new_item
->
prev_
=
nullptr
;
new_item
->
next_
=
tail_
;
tail_
=
new_item
;
new_item
->
prev_
=
nullptr
;
}
tail_
=
new_item
;
}
}
}
}
}
}
}
lib/pls/src/internal/scheduling/abstract_task.cpp
View file @
aa270645
...
@@ -5,72 +5,74 @@
...
@@ -5,72 +5,74 @@
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/scheduler.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
bool
abstract_task
::
steal_work
()
{
PROFILE_STEALING
(
"abstract_task::steal_work"
)
const
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
const
auto
my_scheduler
=
my_state
->
scheduler_
;
const
size_t
my_id
=
my_state
->
id_
;
bool
abstract_task
::
steal_work
()
{
const
size_t
offset
=
my_state
->
random_
()
%
my_scheduler
->
num_threads
();
PROFILE_STEALING
(
"abstract_task::steal_work"
)
const
size_t
max_tries
=
1
;
// my_scheduler->num_threads(); TODO: Tune this value
const
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
for
(
size_t
i
=
0
;
i
<
max_tries
;
i
++
)
{
const
auto
my_scheduler
=
my_state
->
scheduler_
;
size_t
target
=
(
offset
+
i
)
%
my_scheduler
->
num_threads
();
if
(
target
==
my_id
)
{
continue
;
}
auto
target_state
=
my_scheduler
->
thread_state_for
(
target
);
// TODO: Cleaner Locking Using std::guarded_lock
const
size_t
my_id
=
my_state
->
id_
;
target_state
->
lock_
.
lock
();
const
size_t
offset
=
my_state
->
random_
()
%
my_scheduler
->
num_threads
();
const
size_t
max_tries
=
1
;
// my_scheduler->num_threads(); TODO: Tune this value
for
(
size_t
i
=
0
;
i
<
max_tries
;
i
++
)
{
size_t
target
=
(
offset
+
i
)
%
my_scheduler
->
num_threads
();
if
(
target
==
my_id
)
{
continue
;
}
auto
target_state
=
my_scheduler
->
thread_state_for
(
target
);
// Dig down to our level
// TODO: Cleaner Locking Using std::guarded_lock
PROFILE_STEALING
(
"Go to our level"
)
target_state
->
lock_
.
lock
();
abstract_task
*
current_task
=
target_state
->
root_task_
;
while
(
current_task
!=
nullptr
&&
current_task
->
depth
()
<
depth
())
{
current_task
=
current_task
->
child_task_
;
}
PROFILE_END_BLOCK
// Try to steal 'internal', e.g. for_join_sub_tasks in a fork_join_task constellation
// Dig down to our level
PROFILE_STEALING
(
"Internal Steal"
)
PROFILE_STEALING
(
"Go to our level"
)
if
(
current_task
!=
nullptr
)
{
abstract_task
*
current_task
=
target_state
->
root_task_
;
// See if it equals our type and depth of task
while
(
current_task
!=
nullptr
&&
current_task
->
depth
()
<
depth
())
{
if
(
current_task
->
unique_id_
==
unique_id_
&&
current_task
=
current_task
->
child_task_
;
current_task
->
depth_
==
depth_
)
{
}
if
(
internal_stealing
(
current_task
))
{
PROFILE_END_BLOCK
// internal steal was a success, hand it back to the internal scheduler
target_state
->
lock_
.
unlock
();
return
true
;
}
// No success, we need to steal work from a deeper level using 'top level task stealing'
// Try to steal 'internal', e.g. for_join_sub_tasks in a fork_join_task constellation
current_task
=
current_task
->
child_task_
;
PROFILE_STEALING
(
"Internal Steal"
)
}
if
(
current_task
!=
nullptr
)
{
}
// See if it equals our type and depth of task
PROFILE_END_BLOCK
;
if
(
current_task
->
unique_id_
==
unique_id_
&&
current_task
->
depth_
==
depth_
)
{
if
(
internal_stealing
(
current_task
))
{
// internal steal was a success, hand it back to the internal scheduler
target_state
->
lock_
.
unlock
();
return
true
;
}
// No success, we need to steal work from a deeper level using 'top level task stealing'
current_task
=
current_task
->
child_task_
;
}
}
PROFILE_END_BLOCK
;
// Execute 'top level task steal' if possible
// (only try deeper tasks to keep depth restricted stealing).
PROFILE_STEALING
(
"Top Level Steal"
)
while
(
current_task
!=
nullptr
)
{
auto
lock
=
&
target_state
->
lock_
;
if
(
current_task
->
split_task
(
lock
))
{
// internal steal was no success (we did a top level task steal)
return
false
;
}
current_task
=
current_task
->
child_task_
;
// Execute 'top level task steal' if possible
}
// (only try deeper tasks to keep depth restricted stealing).
PROFILE_END_BLOCK
;
PROFILE_STEALING
(
"Top Level Steal"
)
target_state
->
lock_
.
unlock
();
while
(
current_task
!=
nullptr
)
{
}
auto
lock
=
&
target_state
->
lock_
;
if
(
current_task
->
split_task
(
lock
))
{
// internal steal was no success (we did a top level task steal)
return
false
;
}
// internal steal was no success
current_task
=
current_task
->
child_task_
;
return
false
;
};
}
}
}
PROFILE_END_BLOCK
;
target_state
->
lock_
.
unlock
();
}
// internal steal was no success
return
false
;
}
}
}
}
}
lib/pls/src/internal/scheduling/fork_join_task.cpp
View file @
aa270645
...
@@ -4,131 +4,133 @@
...
@@ -4,131 +4,133 @@
#include "pls/internal/scheduling/fork_join_task.h"
#include "pls/internal/scheduling/fork_join_task.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
fork_join_sub_task
::
fork_join_sub_task
()
:
data_structures
::
deque_item
{},
fork_join_sub_task
::
fork_join_sub_task
()
:
ref_count_
{
0
},
data_structures
::
deque_item
{},
parent_
{
nullptr
},
ref_count_
{
0
},
tbb_task_
{
nullptr
},
parent_
{
nullptr
},
stack_state_
{
nullptr
}
{}
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
fork_join_sub_task
::
fork_join_sub_task
(
const
fork_join_sub_task
&
other
)
:
data_structures
::
deque_item
(
other
),
fork_join_sub_task
::
fork_join_sub_task
(
const
fork_join_sub_task
&
other
)
:
ref_count_
{
0
},
data_structures
::
deque_item
(
other
),
parent_
{
nullptr
},
ref_count_
{
0
},
tbb_task_
{
nullptr
},
parent_
{
nullptr
},
stack_state_
{
nullptr
}
{}
tbb_task_
{
nullptr
},
stack_state_
{
nullptr
}
{}
void
fork_join_sub_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute sub_task"
)
void
fork_join_sub_task
::
execute
()
{
tbb_task_
->
currently_executing_
=
this
;
PROFILE_WORK_BLOCK
(
"execute sub_task"
)
execute_internal
();
tbb_task_
->
currently_executing_
=
this
;
tbb_task_
->
currently_executing_
=
nullptr
;
execute_internal
();
PROFILE_END_BLOCK
tbb_task_
->
currently_executing_
=
nullptr
;
wait_for_all
();
PROFILE_END_BLOCK
wait_for_all
();
if
(
parent_
!=
nullptr
)
{
parent_
->
ref_count_
--
;
if
(
parent_
!=
nullptr
)
{
}
parent_
->
ref_count_
--
;
}
}
}
void
fork_join_sub_task
::
spawn_child_internal
(
fork_join_sub_task
*
sub_task
)
{
// Keep our refcount up to date
void
fork_join_sub_task
::
spawn_child_internal
(
fork_join_sub_task
*
sub_task
)
{
ref_count_
++
;
// Keep our refcount up to date
ref_count_
++
;
// Assign forced values
sub_task
->
parent_
=
this
;
// Assign forced values
sub_task
->
tbb_task_
=
tbb_task_
;
sub_task
->
parent_
=
this
;
sub_task
->
stack_state_
=
tbb_task_
->
my_stack_
->
save_state
();
sub_task
->
tbb_task_
=
tbb_task_
;
sub_task
->
stack_state_
=
tbb_task_
->
my_stack_
->
save_state
();
tbb_task_
->
deque_
.
push_tail
(
sub_task
);
}
tbb_task_
->
deque_
.
push_tail
(
sub_task
);
}
void
fork_join_sub_task
::
wait_for_all
()
{
while
(
ref_count_
>
0
)
{
void
fork_join_sub_task
::
wait_for_all
()
{
PROFILE_STEALING
(
"get local sub task"
)
while
(
ref_count_
>
0
)
{
fork_join_sub_task
*
local_task
=
tbb_task_
->
get_local_sub_task
();
PROFILE_STEALING
(
"get local sub task"
)
PROFILE_END_BLOCK
fork_join_sub_task
*
local_task
=
tbb_task_
->
get_local_sub_task
();
if
(
local_task
!=
nullptr
)
{
PROFILE_END_BLOCK
local_task
->
execute
();
if
(
local_task
!=
nullptr
)
{
}
else
{
local_task
->
execute
();
// Try to steal work.
}
else
{
// External steal will be executed implicitly if success
// Try to steal work.
PROFILE_STEALING
(
"steal work"
)
// External steal will be executed implicitly if success
bool
internal_steal_success
=
tbb_task_
->
steal_work
();
PROFILE_STEALING
(
"steal work"
)
PROFILE_END_BLOCK
bool
internal_steal_success
=
tbb_task_
->
steal_work
();
if
(
internal_steal_success
)
{
PROFILE_END_BLOCK
tbb_task_
->
last_stolen_
->
execute
();
if
(
internal_steal_success
)
{
}
tbb_task_
->
last_stolen_
->
execute
();
}
}
}
tbb_task_
->
my_stack_
->
reset_state
(
stack_state_
);
}
fork_join_sub_task
*
fork_join_task
::
get_local_sub_task
()
{
return
deque_
.
pop_tail
();
}
fork_join_sub_task
*
fork_join_task
::
get_stolen_sub_task
()
{
return
deque_
.
pop_head
();
}
bool
fork_join_task
::
internal_stealing
(
abstract_task
*
other_task
)
{
PROFILE_STEALING
(
"fork_join_task::internal_stealin"
)
auto
cast_other_task
=
reinterpret_cast
<
fork_join_task
*>
(
other_task
);
auto
stolen_sub_task
=
cast_other_task
->
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
else
{
// Make sub-task belong to our fork_join_task instance
stolen_sub_task
->
tbb_task_
=
this
;
stolen_sub_task
->
stack_state_
=
my_stack_
->
save_state
();
// We will execute this next without explicitly moving it onto our stack storage
last_stolen_
=
stolen_sub_task
;
return
true
;
}
}
bool
fork_join_task
::
split_task
(
base
::
spin_lock
*
lock
)
{
PROFILE_STEALING
(
"fork_join_task::split_task"
)
fork_join_sub_task
*
stolen_sub_task
=
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
fork_join_task
task
{
stolen_sub_task
,
this
->
unique_id
()};
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
scheduler
::
execute_task
(
task
,
depth
());
return
true
;
}
void
fork_join_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute fork_join_task"
);
// Bind this instance to our OS thread
my_stack_
=
base
::
this_thread
::
state
<
thread_state
>
()
->
task_stack_
;
root_task_
->
tbb_task_
=
this
;
root_task_
->
stack_state_
=
my_stack_
->
save_state
();
// Execute it on our OS thread until its finished
root_task_
->
execute
();
}
fork_join_sub_task
*
fork_join_task
::
currently_executing
()
const
{
return
currently_executing_
;
}
fork_join_task
::
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
)
:
abstract_task
{
0
,
id
},
root_task_
{
root_task
},
currently_executing_
{
nullptr
},
my_stack_
{
nullptr
},
deque_
{},
last_stolen_
{
nullptr
}
{};
}
}
}
}
tbb_task_
->
my_stack_
->
reset_state
(
stack_state_
);
}
fork_join_sub_task
*
fork_join_task
::
get_local_sub_task
()
{
return
deque_
.
pop_tail
();
}
fork_join_sub_task
*
fork_join_task
::
get_stolen_sub_task
()
{
return
deque_
.
pop_head
();
}
bool
fork_join_task
::
internal_stealing
(
abstract_task
*
other_task
)
{
PROFILE_STEALING
(
"fork_join_task::internal_stealin"
)
auto
cast_other_task
=
reinterpret_cast
<
fork_join_task
*>
(
other_task
);
auto
stolen_sub_task
=
cast_other_task
->
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
else
{
// Make sub-task belong to our fork_join_task instance
stolen_sub_task
->
tbb_task_
=
this
;
stolen_sub_task
->
stack_state_
=
my_stack_
->
save_state
();
// We will execute this next without explicitly moving it onto our stack storage
last_stolen_
=
stolen_sub_task
;
return
true
;
}
}
bool
fork_join_task
::
split_task
(
base
::
spin_lock
*
lock
)
{
PROFILE_STEALING
(
"fork_join_task::split_task"
)
fork_join_sub_task
*
stolen_sub_task
=
get_stolen_sub_task
();
if
(
stolen_sub_task
==
nullptr
)
{
return
false
;
}
fork_join_task
task
{
stolen_sub_task
,
this
->
unique_id
()};
// In success case, unlock.
// TODO: this locking is complicated and error prone.
lock
->
unlock
();
scheduler
::
execute_task
(
task
,
depth
());
return
true
;
}
void
fork_join_task
::
execute
()
{
PROFILE_WORK_BLOCK
(
"execute fork_join_task"
);
// Bind this instance to our OS thread
my_stack_
=
base
::
this_thread
::
state
<
thread_state
>
()
->
task_stack_
;
root_task_
->
tbb_task_
=
this
;
root_task_
->
stack_state_
=
my_stack_
->
save_state
();
// Execute it on our OS thread until its finished
root_task_
->
execute
();
}
fork_join_sub_task
*
fork_join_task
::
currently_executing
()
const
{
return
currently_executing_
;
}
fork_join_task
::
fork_join_task
(
fork_join_sub_task
*
root_task
,
const
abstract_task
::
id
&
id
)
:
abstract_task
{
0
,
id
},
root_task_
{
root_task
},
currently_executing_
{
nullptr
},
my_stack_
{
nullptr
},
deque_
{},
last_stolen_
{
nullptr
}
{}
}
}
}
}
lib/pls/src/internal/scheduling/root_task.cpp
View file @
aa270645
#include "pls/internal/scheduling/root_task.h"
#include "pls/internal/scheduling/root_task.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
}
}
}
}
}
}
lib/pls/src/internal/scheduling/run_on_n_threads_task.cpp
View file @
aa270645
#include "pls/internal/scheduling/run_on_n_threads_task.h"
#include "pls/internal/scheduling/run_on_n_threads_task.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
}
}
}
}
}
}
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
aa270645
...
@@ -2,60 +2,63 @@
...
@@ -2,60 +2,63 @@
#include "pls/internal/base/error_handling.h"
#include "pls/internal/base/error_handling.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
scheduler
::
scheduler
(
scheduler_memory
*
memory
,
const
unsigned
int
num_threads
)
:
num_threads_
{
num_threads
},
scheduler
::
scheduler
(
scheduler_memory
*
memory
,
const
unsigned
int
num_threads
)
:
memory_
{
memory
},
num_threads_
{
num_threads
},
sync_barrier_
{
num_threads
+
1
},
memory_
{
memory
},
terminated_
{
false
}
{
sync_barrier_
{
num_threads
+
1
},
if
(
num_threads_
>
memory_
->
max_threads
())
{
terminated_
{
false
}
{
PLS_ERROR
(
"Tried to create scheduler with more OS threads than pre-allocated memory."
);
if
(
num_threads_
>
memory_
->
max_threads
())
{
}
PLS_ERROR
(
"Tried to create scheduler with more OS threads than pre-allocated memory."
);
}
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
// Placement new is required, as the memory of `memory_` is not required to be initialized.
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
new
((
void
*
)
memory_
->
thread_state_for
(
i
))
thread_state
{
this
,
memory_
->
task_stack_for
(
i
),
i
};
// Placement new is required, as the memory of `memory_` is not required to be initialized.
new
((
void
*
)
memory_
->
thread_for
(
i
))
base
::
thread
<
void
(
*
)(),
thread_state
>
(
&
worker_routine
,
memory_
->
thread_state_for
(
i
));
new
((
void
*
)
memory_
->
thread_state_for
(
i
))
thread_state
{
this
,
memory_
->
task_stack_for
(
i
),
i
};
}
new
((
void
*
)
memory_
->
thread_for
(
i
))
base
::
thread
<
void
(
*
)(),
thread_state
>
(
&
worker_routine
,
}
memory_
->
thread_state_for
(
i
));
}
scheduler
::~
scheduler
()
{
}
terminate
();
}
scheduler
::~
scheduler
()
{
terminate
();
void
worker_routine
()
{
}
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
void
worker_routine
()
{
while
(
true
)
{
auto
my_state
=
base
::
this_thread
::
state
<
thread_state
>
();
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
if
(
my_state
->
scheduler_
->
terminated_
)
{
while
(
true
)
{
return
;
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
if
(
my_state
->
scheduler_
->
terminated_
)
{
return
;
// The root task must only return when all work is done,
}
// because of this a simple call is enough to ensure the
// fork-join-section is done (logically joined back into our main thread).
// The root task must only return when all work is done,
my_state
->
root_task_
->
execute
();
// because of this a simple call is enough to ensure the
// fork-join-section is done (logically joined back into our main thread).
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
my_state
->
root_task_
->
execute
();
}
}
my_state
->
scheduler_
->
sync_barrier_
.
wait
();
}
void
scheduler
::
terminate
(
bool
wait_for_workers
)
{
}
if
(
terminated_
)
{
return
;
void
scheduler
::
terminate
(
bool
wait_for_workers
)
{
}
if
(
terminated_
)
{
return
;
terminated_
=
true
;
}
sync_barrier_
.
wait
();
terminated_
=
true
;
if
(
wait_for_workers
)
{
sync_barrier_
.
wait
();
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
memory_
->
thread_for
(
i
)
->
join
();
if
(
wait_for_workers
)
{
}
for
(
unsigned
int
i
=
0
;
i
<
num_threads_
;
i
++
)
{
}
memory_
->
thread_for
(
i
)
->
join
();
}
}
}
}
}
}
}
}
}
}
lib/pls/src/internal/scheduling/scheduler_memory.cpp
View file @
aa270645
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/scheduler_memory.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
num_threads_
{
num_threads
}
{
threads_
=
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread
)));
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread_state
)));
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_aligned_stack
)));
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
num_threads_
{
num_threads
}
{
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
threads_
=
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_thread
)));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
}
num_threads
*
sizeof
(
aligned_thread_state
)));
}
malloc_scheduler_memory
::~
malloc_scheduler_memory
()
{
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
free
(
threads_
);
num_threads
*
sizeof
(
aligned_aligned_stack
)));
free
(
thread_states_
);
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
}
}
malloc_scheduler_memory
::~
malloc_scheduler_memory
()
{
free
(
threads_
);
free
(
thread_states_
);
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
free
(
task_stacks_memory_
[
i
]);
}
free
(
task_stacks_
);
free
(
task_stacks_memory_
);
}
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
}
free
(
task_stacks_memory_
[
i
]);
}
}
free
(
task_stacks_
);
free
(
task_stacks_memory_
);
}
}
}
}
}
lib/pls/src/internal/scheduling/thread_state.cpp
View file @
aa270645
#include "pls/internal/scheduling/thread_state.h"
#include "pls/internal/scheduling/thread_state.h"
namespace
pls
{
namespace
pls
{
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
}
}
}
}
}
}
test/base_tests.cpp
View file @
aa270645
...
@@ -13,73 +13,73 @@ static bool base_tests_visited;
...
@@ -13,73 +13,73 @@ static bool base_tests_visited;
static
int
base_tests_local_value_one
;
static
int
base_tests_local_value_one
;
static
vector
<
int
>
base_tests_local_value_two
;
static
vector
<
int
>
base_tests_local_value_two
;
TEST_CASE
(
"thread creation and joining"
,
"[internal/data_structures/thread.h]"
)
{
TEST_CASE
(
"thread creation and joining"
,
"[internal/data_structures/thread.h]"
)
{
base_tests_visited
=
false
;
base_tests_visited
=
false
;
auto
t1
=
start_thread
([]()
{
base_tests_visited
=
true
;
});
auto
t1
=
start_thread
([]()
{
base_tests_visited
=
true
;
});
t1
.
join
();
t1
.
join
();
REQUIRE
(
base_tests_visited
);
REQUIRE
(
base_tests_visited
);
}
}
TEST_CASE
(
"thread state"
,
"[internal/data_structures/thread.h]"
)
{
TEST_CASE
(
"thread state"
,
"[internal/data_structures/thread.h]"
)
{
int
state_one
=
1
;
int
state_one
=
1
;
vector
<
int
>
state_two
{
1
,
2
};
vector
<
int
>
state_two
{
1
,
2
};
auto
t1
=
start_thread
([]()
{
base_tests_local_value_one
=
*
this_thread
::
state
<
int
>
();
},
&
state_one
);
auto
t1
=
start_thread
([]()
{
base_tests_local_value_one
=
*
this_thread
::
state
<
int
>
();
},
&
state_one
);
auto
t2
=
start_thread
([]()
{
base_tests_local_value_two
=
*
this_thread
::
state
<
vector
<
int
>>
();
},
&
state_two
);
auto
t2
=
start_thread
([]()
{
base_tests_local_value_two
=
*
this_thread
::
state
<
vector
<
int
>>
();
},
&
state_two
);
t1
.
join
();
t1
.
join
();
t2
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_local_value_one
==
1
);
REQUIRE
(
base_tests_local_value_one
==
1
);
REQUIRE
(
base_tests_local_value_two
==
vector
<
int
>
{
1
,
2
});
REQUIRE
(
base_tests_local_value_two
==
vector
<
int
>
{
1
,
2
});
}
}
int
base_tests_shared_counter
;
int
base_tests_shared_counter
;
TEST_CASE
(
"spinlock protects concurrent counter"
,
"[internal/data_structures/spinlock.h]"
)
{
TEST_CASE
(
"spinlock protects concurrent counter"
,
"[internal/data_structures/spinlock.h]"
)
{
constexpr
int
num_iterations
=
1000000
;
constexpr
int
num_iterations
=
1000000
;
base_tests_shared_counter
=
0
;
base_tests_shared_counter
=
0
;
spin_lock
lock
{};
spin_lock
lock
{};
SECTION
(
"lock can be used by itself"
)
{
SECTION
(
"lock can be used by itself"
)
{
auto
t1
=
start_thread
([
&
]()
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
lock
.
lock
();
base_tests_shared_counter
++
;
base_tests_shared_counter
++
;
lock
.
unlock
();
lock
.
unlock
();
}
}
});
});
auto
t2
=
start_thread
([
&
]()
{
auto
t2
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
lock
.
lock
();
lock
.
lock
();
base_tests_shared_counter
--
;
base_tests_shared_counter
--
;
lock
.
unlock
();
lock
.
unlock
();
}
}
});
});
t1
.
join
();
t1
.
join
();
t2
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_shared_counter
==
0
);
REQUIRE
(
base_tests_shared_counter
==
0
);
}
}
SECTION
(
"lock can be used with std::lock_guard"
)
{
SECTION
(
"lock can be used with std::lock_guard"
)
{
auto
t1
=
start_thread
([
&
]()
{
auto
t1
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
base_tests_shared_counter
++
;
base_tests_shared_counter
++
;
}
}
});
});
auto
t2
=
start_thread
([
&
]()
{
auto
t2
=
start_thread
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_iterations
;
i
++
)
{
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
std
::
lock_guard
<
spin_lock
>
my_lock
{
lock
};
base_tests_shared_counter
--
;
base_tests_shared_counter
--
;
}
}
});
});
t1
.
join
();
t1
.
join
();
t2
.
join
();
t2
.
join
();
REQUIRE
(
base_tests_shared_counter
==
0
);
REQUIRE
(
base_tests_shared_counter
==
0
);
}
}
}
}
test/data_structures_test.cpp
View file @
aa270645
...
@@ -12,122 +12,121 @@ using namespace pls::internal::data_structures;
...
@@ -12,122 +12,121 @@ using namespace pls::internal::data_structures;
using
namespace
pls
::
internal
::
base
;
using
namespace
pls
::
internal
::
base
;
using
namespace
std
;
using
namespace
std
;
TEST_CASE
(
"aligned stack stores objects correctly"
,
"[internal/data_structures/aligned_stack.h]"
)
{
TEST_CASE
(
"aligned stack stores objects correctly"
,
"[internal/data_structures/aligned_stack.h]"
)
{
constexpr
long
data_size
=
1024
;
constexpr
long
data_size
=
1024
;
char
data
[
data_size
];
char
data
[
data_size
];
aligned_stack
stack
{
data
,
data_size
};
aligned_stack
stack
{
data
,
data_size
};
SECTION
(
"stack correctly pushes sub linesize objects"
)
{
SECTION
(
"stack correctly pushes sub linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
}
SECTION
(
"stack correctly pushes above linesize objects"
)
{
SECTION
(
"stack correctly pushes above linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
system_details
::
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
std
::
array
<
char
,
system_details
::
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
auto
big_pointer_one
=
stack
.
push
(
big_data_one
);
auto
big_pointer_one
=
stack
.
push
(
big_data_one
);
auto
small_pointer_one
=
stack
.
push
(
small_data_one
);
auto
small_pointer_one
=
stack
.
push
(
small_data_one
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
big_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
big_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
}
SECTION
(
"stack correctly stores and retrieves objects"
)
{
SECTION
(
"stack correctly stores and retrieves objects"
)
{
std
::
array
<
char
,
5
>
data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
5
>
data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
stack
.
push
(
data_one
);
stack
.
push
(
data_one
);
auto
retrieved_data
=
stack
.
pop
<
std
::
array
<
char
,
5
>>
();
auto
retrieved_data
=
stack
.
pop
<
std
::
array
<
char
,
5
>>
();
REQUIRE
(
retrieved_data
==
std
::
array
<
char
,
5
>
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
});
REQUIRE
(
retrieved_data
==
std
::
array
<
char
,
5
>
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
});
}
}
SECTION
(
"stack can push and pop multiple times with correct alignment"
)
{
SECTION
(
"stack can push and pop multiple times with correct alignment"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
64
>
small_data_two
{};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
std
::
array
<
char
,
1
>
small_data_three
{
'A'
};
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_one
=
stack
.
push
(
small_data_one
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
stack
.
pop
<
typeof
(
small_data_three
)
>
();
stack
.
pop
<
typeof
(
small_data_three
)
>
();
stack
.
pop
<
typeof
(
small_data_two
)
>
();
stack
.
pop
<
typeof
(
small_data_two
)
>
();
auto
pointer_four
=
stack
.
push
(
small_data_two
);
auto
pointer_four
=
stack
.
push
(
small_data_two
);
auto
pointer_five
=
stack
.
push
(
small_data_three
);
auto
pointer_five
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_four
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_four
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_five
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_five
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
pointer_four
==
pointer_two
);
REQUIRE
(
pointer_four
==
pointer_two
);
REQUIRE
(
pointer_five
==
pointer_three
);
REQUIRE
(
pointer_five
==
pointer_three
);
}
}
}
}
TEST_CASE
(
"deque stores objects correctly"
,
"[internal/data_structures/deque.h]"
)
{
TEST_CASE
(
"deque stores objects correctly"
,
"[internal/data_structures/deque.h]"
)
{
class
my_item
:
public
deque_item
{
class
my_item
:
public
deque_item
{
};
};
deque
<
my_item
>
deque
;
deque
<
my_item
>
deque
;
my_item
one
,
two
,
three
;
my_item
one
,
two
,
three
;
SECTION
(
"add and remove items form the tail"
)
{
SECTION
(
"add and remove items form the tail"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
}
}
SECTION
(
"handles getting empty by popping the tail correctly"
)
{
SECTION
(
"handles getting empty by popping the tail correctly"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
REQUIRE
(
deque
.
pop_tail
()
==
&
two
);
}
}
SECTION
(
"remove items form the head"
)
{
SECTION
(
"remove items form the head"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
three
);
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
three
);
REQUIRE
(
deque
.
pop_head
()
==
&
three
);
}
}
SECTION
(
"handles getting empty by popping the head correctly"
)
{
SECTION
(
"handles getting empty by popping the head correctly"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
REQUIRE
(
deque
.
pop_head
()
==
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
}
}
SECTION
(
"handles getting empty by popping the head and tail correctly"
)
{
SECTION
(
"handles getting empty by popping the head and tail correctly"
)
{
deque
.
push_tail
(
&
one
);
deque
.
push_tail
(
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
REQUIRE
(
deque
.
pop_tail
()
==
&
one
);
deque
.
push_tail
(
&
two
);
deque
.
push_tail
(
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
REQUIRE
(
deque
.
pop_head
()
==
&
two
);
deque
.
push_tail
(
&
three
);
deque
.
push_tail
(
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
REQUIRE
(
deque
.
pop_tail
()
==
&
three
);
}
}
}
}
test/scheduling_tests.cpp
View file @
aa270645
...
@@ -4,76 +4,75 @@
...
@@ -4,76 +4,75 @@
using
namespace
pls
;
using
namespace
pls
;
class
once_sub_task
:
public
fork_join_sub_task
{
class
once_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>*
counter_
;
std
::
atomic
<
int
>
*
counter_
;
int
children_
;
int
children_
;
protected
:
protected
:
void
execute_internal
()
override
{
void
execute_internal
()
override
{
(
*
counter_
)
++
;
(
*
counter_
)
++
;
for
(
int
i
=
0
;
i
<
children_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
children_
;
i
++
)
{
spawn_child
(
once_sub_task
(
counter_
,
children_
-
1
));
spawn_child
(
once_sub_task
(
counter_
,
children_
-
1
));
}
}
}
}
public
:
public
:
explicit
once_sub_task
(
std
::
atomic
<
int
>*
counter
,
int
children
)
:
explicit
once_sub_task
(
std
::
atomic
<
int
>
*
counter
,
int
children
)
:
fork_join_sub_task
(),
fork_join_sub_task
(),
counter_
{
counter
},
counter_
{
counter
},
children_
{
children
}
{}
children_
{
children
}
{}
};
};
class
force_steal_sub_task
:
public
fork_join_sub_task
{
class
force_steal_sub_task
:
public
fork_join_sub_task
{
std
::
atomic
<
int
>*
parent_counter_
;
std
::
atomic
<
int
>
*
parent_counter_
;
std
::
atomic
<
int
>*
overall_counter_
;
std
::
atomic
<
int
>
*
overall_counter_
;
protected
:
protected
:
void
execute_internal
()
override
{
void
execute_internal
()
override
{
(
*
overall_counter_
)
--
;
(
*
overall_counter_
)
--
;
if
(
overall_counter_
->
load
()
>
0
)
{
if
(
overall_counter_
->
load
()
>
0
)
{
std
::
atomic
<
int
>
counter
{
1
};
std
::
atomic
<
int
>
counter
{
1
};
spawn_child
(
force_steal_sub_task
(
&
counter
,
overall_counter_
));
spawn_child
(
force_steal_sub_task
(
&
counter
,
overall_counter_
));
while
(
counter
.
load
()
>
0
)
while
(
counter
.
load
()
>
0
);
// Spin...
;
// Spin...
}
(
*
parent_counter_
)
--
;
}
}
public
:
(
*
parent_counter_
)
--
;
explicit
force_steal_sub_task
(
std
::
atomic
<
int
>*
parent_counter
,
std
::
atomic
<
int
>*
overall_counter
)
:
}
fork_join_sub_task
(),
parent_counter_
{
parent_counter
},
public
:
overall_counter_
{
overall_counter
}
{}
explicit
force_steal_sub_task
(
std
::
atomic
<
int
>
*
parent_counter
,
std
::
atomic
<
int
>
*
overall_counter
)
:
fork_join_sub_task
(),
parent_counter_
{
parent_counter
},
overall_counter_
{
overall_counter
}
{}
};
};
TEST_CASE
(
"tbb task are scheduled correctly"
,
"[internal/scheduling/fork_join_task.h]"
)
{
TEST_CASE
(
"tbb task are scheduled correctly"
,
"[internal/scheduling/fork_join_task.h]"
)
{
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2
<<
12
};
malloc_scheduler_memory
my_scheduler_memory
{
8
,
2
<<
12
};
SECTION
(
"tasks are executed exactly once"
)
{
SECTION
(
"tasks are executed exactly once"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
2
};
scheduler
my_scheduler
{
&
my_scheduler_memory
,
2
};
int
start_counter
=
4
;
int
start_counter
=
4
;
int
total_tasks
=
1
+
4
+
4
*
3
+
4
*
3
*
2
+
4
*
3
*
2
*
1
;
int
total_tasks
=
1
+
4
+
4
*
3
+
4
*
3
*
2
+
4
*
3
*
2
*
1
;
std
::
atomic
<
int
>
counter
{
0
};
std
::
atomic
<
int
>
counter
{
0
};
my_scheduler
.
perform_work
([
&
]
()
{
my_scheduler
.
perform_work
([
&
]()
{
once_sub_task
sub_task
{
&
counter
,
start_counter
};
once_sub_task
sub_task
{
&
counter
,
start_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
scheduler
::
execute_task
(
task
);
});
});
REQUIRE
(
counter
.
load
()
==
total_tasks
);
REQUIRE
(
counter
.
load
()
==
total_tasks
);
my_scheduler
.
terminate
(
true
);
my_scheduler
.
terminate
(
true
);
}
}
SECTION
(
"tasks can be stolen"
)
{
SECTION
(
"tasks can be stolen"
)
{
scheduler
my_scheduler
{
&
my_scheduler_memory
,
8
};
scheduler
my_scheduler
{
&
my_scheduler_memory
,
8
};
my_scheduler
.
perform_work
([
&
]
()
{
my_scheduler
.
perform_work
([
&
]()
{
std
::
atomic
<
int
>
dummy_parent
{
1
},
overall_counter
{
8
};
std
::
atomic
<
int
>
dummy_parent
{
1
},
overall_counter
{
8
};
force_steal_sub_task
sub_task
{
&
dummy_parent
,
&
overall_counter
};
force_steal_sub_task
sub_task
{
&
dummy_parent
,
&
overall_counter
};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
fork_join_task
task
{
&
sub_task
,
unique_id
::
create
(
42
)};
scheduler
::
execute_task
(
task
);
scheduler
::
execute_task
(
task
);
});
});
my_scheduler
.
terminate
(
true
);
my_scheduler
.
terminate
(
true
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment