Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
e34ea267
authored
Dec 04, 2019
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Working version of our trading-deque
parent
4cf3848f
Pipeline
#1363
failed with stages
in 38 seconds
Changes
15
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
304 additions
and
148 deletions
+304
-148
app/benchmark_fft/main.cpp
+10
-12
app/playground/main.cpp
+9
-5
lib/pls/include/pls/internal/base/error_handling.h
+3
-1
lib/pls/include/pls/internal/data_structures/bounded_trading_deque.h
+101
-30
lib/pls/include/pls/internal/data_structures/delayed_initialization.h
+37
-4
lib/pls/include/pls/internal/data_structures/optional.h
+33
-0
lib/pls/include/pls/internal/scheduling/cont.h
+8
-6
lib/pls/include/pls/internal/scheduling/cont_manager.h
+28
-8
lib/pls/include/pls/internal/scheduling/memory_block.h
+22
-27
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+17
-9
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
+3
-3
lib/pls/include/pls/internal/scheduling/task_manager.h
+28
-40
lib/pls/include/pls/internal/scheduling/thread_state_static.h
+2
-2
lib/pls/src/internal/scheduling/scheduler.cpp
+2
-1
test/data_structures_test.cpp
+1
-0
No files found.
app/benchmark_fft/main.cpp
View file @
e34ea267
...
@@ -90,11 +90,10 @@ complex_vector prepare_input(int input_size) {
...
@@ -90,11 +90,10 @@ complex_vector prepare_input(int input_size) {
return
data
;
return
data
;
}
}
static
constexpr
int
NUM_ITERATIONS
=
5
00
;
static
constexpr
int
NUM_ITERATIONS
=
10
00
;
constexpr
size_t
NUM_THREADS
=
8
;
constexpr
size_t
NUM_THREADS
=
5
;
constexpr
size_t
NUM_TASKS
=
128
;
constexpr
size_t
NUM_TASKS
=
128
;
constexpr
size_t
MAX_TASK_STACK_SIZE
=
0
;
constexpr
size_t
NUM_CONTS
=
128
;
constexpr
size_t
NUM_CONTS
=
128
;
constexpr
size_t
MAX_CONT_SIZE
=
512
;
constexpr
size_t
MAX_CONT_SIZE
=
512
;
...
@@ -104,7 +103,6 @@ int main() {
...
@@ -104,7 +103,6 @@ int main() {
static_scheduler_memory
<
NUM_THREADS
,
static_scheduler_memory
<
NUM_THREADS
,
NUM_TASKS
,
NUM_TASKS
,
MAX_TASK_STACK_SIZE
,
NUM_CONTS
,
NUM_CONTS
,
MAX_CONT_SIZE
>
static_scheduler_memory
;
MAX_CONT_SIZE
>
static_scheduler_memory
;
...
@@ -127,14 +125,14 @@ int main() {
...
@@ -127,14 +125,14 @@ int main() {
std
::
cout
<<
"Framework: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
std
::
cout
<<
"Framework: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
<<
std
::
endl
;
<<
std
::
endl
;
//
start = std::chrono::steady_clock::now();
start
=
std
::
chrono
::
steady_clock
::
now
();
//
for (int i = 0; i < NUM_ITERATIONS; i++) {
for
(
int
i
=
0
;
i
<
NUM_ITERATIONS
;
i
++
)
{
//
complex_vector input_1(initial_input);
complex_vector
input_1
(
initial_input
);
//
fft_normal(input_1.begin(), INPUT_SIZE);
fft_normal
(
input_1
.
begin
(),
INPUT_SIZE
);
//
}
}
//
end = std::chrono::steady_clock::now();
end
=
std
::
chrono
::
steady_clock
::
now
();
//
std::cout << "Normal: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
std
::
cout
<<
"Normal: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
//
<< std::endl;
<<
std
::
endl
;
return
0
;
return
0
;
}
}
app/playground/main.cpp
View file @
e34ea267
...
@@ -4,14 +4,14 @@
...
@@ -4,14 +4,14 @@
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/
base/thread
.h"
#include "pls/internal/
data_structures/bounded_trading_deque
.h"
using
namespace
pls
::
internal
;
using
namespace
pls
::
internal
;
constexpr
size_t
NUM_THREADS
=
1
;
constexpr
size_t
NUM_THREADS
=
1
;
constexpr
size_t
NUM_TASKS
=
128
;
constexpr
size_t
NUM_TASKS
=
128
;
constexpr
size_t
MAX_TASK_STACK_SIZE
=
0
;
static
constexpr
int
NUM_ITERATIONS
=
10
0
;
constexpr
size_t
NUM_CONTS
=
128
;
constexpr
size_t
NUM_CONTS
=
128
;
constexpr
size_t
MAX_CONT_SIZE
=
256
;
constexpr
size_t
MAX_CONT_SIZE
=
256
;
...
@@ -45,33 +45,37 @@ scheduling::parallel_result<int> fib(int n) {
...
@@ -45,33 +45,37 @@ scheduling::parallel_result<int> fib(int n) {
});
});
}
}
static
volatile
int
result
;
int
main
()
{
int
main
()
{
scheduling
::
static_scheduler_memory
<
NUM_THREADS
,
scheduling
::
static_scheduler_memory
<
NUM_THREADS
,
NUM_TASKS
,
NUM_TASKS
,
MAX_TASK_STACK_SIZE
,
NUM_CONTS
,
NUM_CONTS
,
MAX_CONT_SIZE
>
static_scheduler_memory
;
MAX_CONT_SIZE
>
static_scheduler_memory
;
scheduling
::
scheduler
scheduler
{
static_scheduler_memory
,
NUM_THREADS
};
scheduling
::
scheduler
scheduler
{
static_scheduler_memory
,
NUM_THREADS
};
auto
start
=
std
::
chrono
::
steady_clock
::
now
();
auto
start
=
std
::
chrono
::
steady_clock
::
now
();
std
::
cout
<<
"fib = "
<<
fib_normal
(
30
)
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
NUM_ITERATIONS
;
i
++
)
{
result
=
fib_normal
(
30
);
}
auto
end
=
std
::
chrono
::
steady_clock
::
now
();
auto
end
=
std
::
chrono
::
steady_clock
::
now
();
std
::
cout
<<
"Normal: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
std
::
cout
<<
"Normal: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
<<
std
::
endl
;
<<
std
::
endl
;
start
=
std
::
chrono
::
steady_clock
::
now
();
start
=
std
::
chrono
::
steady_clock
::
now
();
for
(
int
i
=
0
;
i
<
NUM_ITERATIONS
;
i
++
)
{
scheduler
.
perform_work
([]()
{
scheduler
.
perform_work
([]()
{
return
scheduling
::
scheduler
::
par
([]()
{
return
scheduling
::
scheduler
::
par
([]()
{
return
scheduling
::
parallel_result
<
int
>
(
0
);
return
scheduling
::
parallel_result
<
int
>
(
0
);
},
[]()
{
},
[]()
{
return
fib
(
30
);
return
fib
(
30
);
}).
then
([](
int
,
int
b
)
{
}).
then
([](
int
,
int
b
)
{
std
::
cout
<<
"fib = "
<<
b
<<
std
::
endl
;
result
=
b
;
return
scheduling
::
parallel_result
<
int
>
{
0
};
return
scheduling
::
parallel_result
<
int
>
{
0
};
});
});
});
});
}
end
=
std
::
chrono
::
steady_clock
::
now
();
end
=
std
::
chrono
::
steady_clock
::
now
();
std
::
cout
<<
"Framework: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
<<
std
::
endl
;
std
::
cout
<<
"Framework: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end
-
start
).
count
()
<<
std
::
endl
;
...
...
lib/pls/include/pls/internal/base/error_handling.h
View file @
e34ea267
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
void
pls_error
(
const
char
*
msg
);
void
pls_error
(
const
char
*
msg
);
#define PLS_ASSERT(cond, msg) if (!(cond)) { pls_error(msg); }
// TODO: Distinguish between debug/internal asserts and production asserts.
// TODO: Re-Enable Asserts
#define PLS_ASSERT(cond, msg) //if (!(cond)) { pls_error(msg); }
#endif //PLS_ERROR_HANDLING_H
#endif //PLS_ERROR_HANDLING_H
lib/pls/include/pls/internal/data_structures/bounded_trading_deque.h
View file @
e34ea267
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#define PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_
#define PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_
#include <atomic>
#include <atomic>
#include <
pls/internal/base/system_details.h
>
#include <
tuple
>
#include "pls/internal/base/error_handling.h"
#include "pls/internal/base/error_handling.h"
#include "pls/internal/base/system_details.h"
#include "pls/internal/base/system_details.h"
...
@@ -17,31 +17,45 @@ namespace data_structures {
...
@@ -17,31 +17,45 @@ namespace data_structures {
template
<
typename
TradedType
>
template
<
typename
TradedType
>
class
traded_field
{
class
traded_field
{
static_assert
(
base
::
system_details
::
CACHE_LINE_SIZE
>=
4
,
"Traded objects must not use their last address bits, as we use them for status flags."
"As traded objects are usually cache aligned, we need big enough cache lines."
);
// TODO: Replace unsigned long with a portable sized integer
// TODO: Replace unsigned long with a portable sized integer
// (some systems might have different pointer sizes to long sizes).
// (some systems might have different pointer sizes to long sizes).
static
constexpr
unsigned
long
SHIFT
=
0x2lu
;
static
constexpr
unsigned
long
TAG_BITS
=
0x3lu
;
static
constexpr
unsigned
long
RELEVANT_BITS
=
~
TAG_BITS
;
static
constexpr
unsigned
long
EMPTY_TAG
=
0x0lu
;
static
constexpr
unsigned
long
STAMP_TAG
=
0x1lu
;
static
constexpr
unsigned
long
TRADE_TAG
=
0x2lu
;
void
fill_with_tag
(
unsigned
long
tag
)
{
public
:
pointer_
=
(
void
*
)
((
tag
<<
1lu
)
|
0x1lu
);
void
fill_with_stamp
(
unsigned
long
stamp
)
{
pointer_
=
(
void
*
)
((
stamp
<<
SHIFT
)
|
STAMP_TAG
);
}
}
unsigned
long
get_
tag
()
{
unsigned
long
get_
stamp
()
{
PLS_ASSERT
(
is_filled_with_tag
(),
"Must only read out the tag when the traded field contains one."
);
PLS_ASSERT
(
is_filled_with_tag
(),
"Must only read out the tag when the traded field contains one."
);
return
((
unsigned
long
)
(
pointer_
))
>>
1lu
;
return
((
unsigned
long
)
pointer_
)
>>
SHIFT
;
}
}
bool
is_filled_with_tag
()
{
bool
is_filled_with_tag
()
{
return
((
unsigned
long
)
(
pointer_
))
&
0x1lu
;
return
((
(
unsigned
long
)
pointer_
)
&
TAG_BITS
)
==
STAMP_TAG
;
}
}
void
fill_with_
object
(
TradedType
*
object
)
{
void
fill_with_
trade_object
(
TradedType
*
trade_
object
)
{
PLS_ASSERT
((
object
&
0x1lu
)
==
0
,
PLS_ASSERT
((
((
unsigned
long
)
trade_object
)
&
TAG_BITS
)
==
0
,
"Must only store aligned objects in this data structure (last bits are needed for tag bit)"
);
"Must only store aligned objects in this data structure (last bits are needed for tag bit)"
);
pointer_
=
object
;
pointer_
=
reinterpret_cast
<
void
*>
(((
unsigned
long
)
trade_object
)
|
TRADE_TAG
)
;
}
}
TradedType
*
get_object
()
{
TradedType
*
get_
trade_
object
()
{
PLS_ASSERT
(
is_filled_with_object
(),
"Must only read out the object when the traded field contains one."
);
PLS_ASSERT
(
is_filled_with_object
(),
"Must only read out the object when the traded field contains one."
);
return
pointer_
;
return
reinterpret_cast
<
TradedType
*>
(((
unsigned
long
)
pointer_
)
&
RELEVANT_BITS
)
;
}
}
bool
is_filled_with_object
()
{
bool
is_filled_with_object
()
{
return
!
is_filled_with_tag
()
&&
pointer_
!=
nullptr
;
return
(((
unsigned
long
)
pointer_
)
&
TAG_BITS
)
==
TRADE_TAG
;
}
bool
is_empty
()
{
return
(((
unsigned
long
)
pointer_
)
&
TAG_BITS
)
==
EMPTY_TAG
;
}
}
private
:
private
:
...
@@ -50,19 +64,21 @@ class traded_field {
...
@@ -50,19 +64,21 @@ class traded_field {
template
<
typename
EntryType
,
typename
TradedType
>
template
<
typename
EntryType
,
typename
TradedType
>
class
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
trading_deque_entry
{
class
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
trading_deque_entry
{
public
:
/*
/*
* Fill the slot with its initial values, making it ready for being stolen.
* Fill the slot with its initial values, making it ready for being stolen.
* Performs no synchronization/memory ordering constraints.
* Performs no synchronization/memory ordering constraints.
*
*
* Method is called to init a field on pushBot.
* Method is called to init a field on pushBot.
*/
*/
void
fill_slots
(
EntryType
*
entry_item
,
unsigned
long
tag
)
{
void
fill_slots
(
EntryType
*
entry_item
,
unsigned
long
expected_stamp
)
{
entry_slot_
.
store
(
entry_item
,
std
::
memory_order_relaxed
);
entry_slot_
.
store
(
entry_item
,
std
::
memory_order_relaxed
);
forwarding_stamp_
.
store
(
expected_stamp
,
std
::
memory_order_relaxed
);
// Relaxed is fine for this, as adding elements is synced over the bot pointer
// Relaxed is fine for this, as adding elements is synced over the bot pointer
auto
old
=
trade_slot_
.
load
(
std
::
memory_order_relaxed
);
auto
old
=
trade_slot_
.
load
(
std
::
memory_order_relaxed
);
old
.
fill_with_
tag
(
tag
);
old
.
fill_with_
stamp
(
expected_stamp
);
trade_slot_
.
store
(
std
::
memory_order_relaxed
);
trade_slot_
.
store
(
old
,
std
::
memory_order_relaxed
);
}
}
/**
/**
...
@@ -78,7 +94,7 @@ class alignas(base::system_details::CACHE_LINE_SIZE) trading_deque_entry {
...
@@ -78,7 +94,7 @@ class alignas(base::system_details::CACHE_LINE_SIZE) trading_deque_entry {
if
(
old_field_value
.
is_filled_with_tag
())
{
if
(
old_field_value
.
is_filled_with_tag
())
{
return
optional
<
TradedType
*>
();
return
optional
<
TradedType
*>
();
}
else
{
}
else
{
return
optional
<
TradedType
*>
(
old_field_value
.
get_object
());
return
optional
<
TradedType
*>
(
old_field_value
.
get_
trade_
object
());
}
}
}
}
...
@@ -86,25 +102,34 @@ class alignas(base::system_details::CACHE_LINE_SIZE) trading_deque_entry {
...
@@ -86,25 +102,34 @@ class alignas(base::system_details::CACHE_LINE_SIZE) trading_deque_entry {
return
entry_slot_
;
return
entry_slot_
;
}
}
optional
<
EntryType
*>
trade_object
(
TradedType
*
offered_object
,
unsigned
long
expected_tag
)
{
bool
is_empty
()
{
return
trade_slot_
.
load
(
std
::
memory_order_seq_cst
).
is_empty
();
}
optional
<
EntryType
*>
trade_object
(
TradedType
*
offered_object
,
unsigned
long
&
expected_stamp
)
{
// Read our potential result
// Read our potential result
EntryType
*
result
=
entry_slot_
.
load
(
std
::
memory_order_relaxed
);
EntryType
*
result
=
entry_slot_
.
load
(
std
::
memory_order_relaxed
);
unsigned
long
forwarding_stamp
=
forwarding_stamp_
.
load
(
std
::
memory_order_relaxed
);
// Try to get it by CAS with the expected field entry, giving up our offered_object for it
// Try to get it by CAS with the expected field entry, giving up our offered_object for it
traded_field
<
TradedType
>
expected_field
;
traded_field
<
TradedType
>
expected_field
;
expected_field
.
fill_with_
tag
(
expected_tag
);
expected_field
.
fill_with_
stamp
(
expected_stamp
);
traded_field
<
TradedType
>
offered_field
;
traded_field
<
TradedType
>
offered_field
;
offered_field
.
fill_with_object
(
offered_object
);
offered_field
.
fill_with_
trade_
object
(
offered_object
);
if
(
trade_slot_
.
compare_exchange_strong
(
expected_field
,
offered_field
,
std
::
memory_order_acq_rel
))
{
if
(
trade_slot_
.
compare_exchange_strong
(
expected_field
,
offered_field
,
std
::
memory_order_acq_rel
))
{
return
optional
<
EntryType
*>
(
result
)
;
return
optional
<
EntryType
*>
{
result
}
;
}
else
{
}
else
{
return
optional
<
EntryType
*>
(
nullptr
);
if
(
expected_field
.
is_empty
())
{
expected_stamp
=
forwarding_stamp
;
}
return
optional
<
EntryType
*>
{};
}
}
}
}
private
:
private
:
std
::
atomic
<
EntryType
*>
entry_slot_
{
nullptr
};
std
::
atomic
<
EntryType
*>
entry_slot_
{
nullptr
};
std
::
atomic
<
unsigned
long
>
forwarding_stamp_
{};
std
::
atomic
<
traded_field
<
TradedType
>>
trade_slot_
{};
std
::
atomic
<
traded_field
<
TradedType
>>
trade_slot_
{};
};
};
...
@@ -132,9 +157,9 @@ class bounded_trading_deque {
...
@@ -132,9 +157,9 @@ class bounded_trading_deque {
void
push_bot
(
EntryType
*
offered_object
)
{
void
push_bot
(
EntryType
*
offered_object
)
{
auto
expected_stamp
=
bot_internal_
.
stamp
;
auto
expected_stamp
=
bot_internal_
.
stamp
;
auto
*
current_entry
=
entries_
[
bot_internal_
.
value
];
auto
&
current_entry
=
entries_
[
bot_internal_
.
value
];
current_entry
->
fill_slots
(
offered_object
,
expected_stamp
);
current_entry
.
fill_slots
(
offered_object
,
expected_stamp
);
bot_internal_
.
stamp
++
;
bot_internal_
.
stamp
++
;
bot_internal_
.
value
++
;
bot_internal_
.
value
++
;
...
@@ -157,28 +182,63 @@ class bounded_trading_deque {
...
@@ -157,28 +182,63 @@ class bounded_trading_deque {
// Go one step back
// Go one step back
bot_internal_
.
value
--
;
bot_internal_
.
value
--
;
auto
*
current_entry
=
entries_
[
bot_internal_
.
value
];
auto
&
current_entry
=
entries_
[
bot_internal_
.
value
];
optional
<
TradedType
*>
traded_object
=
current_entry
->
acquire_traded_type
();
optional
<
TradedType
*>
traded_object
=
current_entry
.
acquire_traded_type
();
optional
<
EntryType
*>
queue_entry
;
optional
<
EntryType
*>
queue_entry
;
if
(
traded_object
)
{
if
(
traded_object
)
{
// We do not return an entry, but the traded object
// We do not return an entry, but the traded object
queue_entry
=
{};
queue_entry
=
optional
<
EntryType
*>
{};
}
else
{
}
else
{
// We still got it locally, grab the object
// We still got it locally, grab the object
queue_entry
=
{
current_entry
->
get_object
()};
queue_entry
=
optional
<
EntryType
*>
{
current_entry
.
get_object
()};
// Keep the tag up to date (we must re-use it, as the head is just increasing by steps of one from the beginning)
bot_internal_
.
stamp
--
;
}
}
bot_
.
store
(
bot_internal_
.
value
,
std
::
memory_order_relaxed
);
bot_
.
store
(
bot_internal_
.
value
,
std
::
memory_order_relaxed
);
if
(
bot_internal_
.
value
==
0
)
{
bot_internal_
.
stamp
++
;
top_
.
store
({
bot_internal_
.
stamp
,
0
},
std
::
memory_order_release
);
}
return
pop_result
{
queue_entry
,
traded_object
};
return
pop_result
{
queue_entry
,
traded_object
};
}
}
std
::
tuple
<
optional
<
EntryType
*>
,
stamped_integer
>
peek_top
()
{
auto
local_top
=
top_
.
load
();
auto
local_bot
=
bot_
.
load
();
if
(
local_top
.
value
>=
local_bot
)
{
return
std
::
make_tuple
(
optional
<
EntryType
*>
{},
local_top
);
}
else
{
return
std
::
make_tuple
(
optional
<
EntryType
*>
{
entries_
[
local_top
.
value
].
get_object
()},
local_top
);
}
}
optional
<
EntryType
*>
pop_top
(
TradedType
*
trade_offer
)
{
optional
<
EntryType
*>
pop_top
(
TradedType
*
trade_offer
)
{
auto
local_top
=
top_
.
load
();
auto
local_top
=
top_
.
load
();
optional
<
EntryType
*>
entry
=
entries_
[
local_top
.
value
].
trade_object
(
trade_offer
,
local_top
.
stamp
);
return
pop_top
(
trade_offer
,
local_top
);
}
optional
<
EntryType
*>
pop_top
(
TradedType
*
trade_offer
,
stamped_integer
local_top
)
{
auto
local_bot
=
bot_
.
load
();
if
(
local_top
.
value
>=
local_bot
)
{
return
optional
<
EntryType
*>
{};
}
unsigned
long
expected_top_stamp
=
local_top
.
stamp
;
optional
<
EntryType
*>
entry
=
entries_
[
local_top
.
value
].
trade_object
(
trade_offer
,
expected_top_stamp
);
if
(
entry
)
{
// We got it, for sure move the top pointer forward.
top_
.
compare_exchange_strong
(
local_top
,
{
local_top
.
stamp
+
1
,
local_top
.
value
+
1
});
}
else
{
// We did not get it....
if
(
entries_
[
local_top
.
value
].
is_empty
())
{
// ...update the top stamp, so the next call can get it (we still make system progress, as the owner
// must have popped off the element)
top_
.
compare_exchange_strong
(
local_top
,
{
expected_top_stamp
,
local_top
.
value
});
}
else
{
// ...move the pointer forward if someone else put a valid trade object in there.
top_
.
compare_exchange_strong
(
local_top
,
{
local_top
.
stamp
+
1
,
local_top
.
value
+
1
});
top_
.
compare_exchange_strong
(
local_top
,
{
local_top
.
stamp
+
1
,
local_top
.
value
+
1
});
}
}
return
entry
;
return
entry
;
}
}
...
@@ -193,6 +253,17 @@ class bounded_trading_deque {
...
@@ -193,6 +253,17 @@ class bounded_trading_deque {
stamped_integer
bot_internal_
{
0
,
0
};
stamped_integer
bot_internal_
{
0
,
0
};
};
};
template
<
typename
EntryType
,
typename
TradedType
,
size_t
SIZE
>
class
static_bounded_trading_deque
{
public
:
static_bounded_trading_deque
()
:
items_
{},
deque_
{
items_
.
data
(),
SIZE
}
{}
bounded_trading_deque
<
EntryType
,
TradedType
>
&
get_deque
()
{
return
deque_
;
}
private
:
std
::
array
<
trading_deque_entry
<
EntryType
,
TradedType
>
,
SIZE
>
items_
;
bounded_trading_deque
<
EntryType
,
TradedType
>
deque_
;
};
}
}
}
}
}
}
...
...
lib/pls/include/pls/internal/data_structures/delayed_initialization.h
View file @
e34ea267
...
@@ -19,15 +19,38 @@ namespace data_structures {
...
@@ -19,15 +19,38 @@ namespace data_structures {
* Takes care of the de-construction the contained object if one is active.
* Takes care of the de-construction the contained object if one is active.
*/
*/
template
<
typename
T
>
template
<
typename
T
>
class
delayed_initialization
{
class
alignas
(
alignof
(
T
))
delayed_initialization
{
public
:
public
:
delayed_initialization
()
:
memory_
{},
initialized_
{
false
}
{}
delayed_initialization
()
:
memory_
{},
initialized_
{
false
}
{}
delayed_initialization
(
const
delayed_initialization
&
)
=
delete
;
delayed_initialization
(
delayed_initialization
&&
other
)
noexcept
{
delayed_initialization
(
delayed_initialization
&&
other
)
noexcept
{
initialized_
=
other
.
initialized_
;
if
(
other
.
initialized
())
{
if
(
other
.
initialized_
)
{
new
((
void
*
)
memory_
.
data
())
T
(
std
::
move
(
other
.
object
()));
other
.
initialized_
=
false
;
initialized_
=
true
;
}
}
delayed_initialization
&
operator
=
(
const
delayed_initialization
&
)
=
delete
;
delayed_initialization
&
operator
=
(
delayed_initialization
&&
other
)
noexcept
{
if
(
&
other
==
this
)
{
return
*
this
;
}
if
(
initialized
()
&&
other
.
initialized
())
{
object
()
=
std
::
move
(
other
.
object
());
object
()
=
std
::
move
(
other
.
object
());
other
.
initialized_
=
false
;
other
.
initialized_
=
false
;
initialized_
=
true
;
return
*
this
;
}
}
if
(
!
initialized
()
&&
other
.
initialized_
)
{
new
((
void
*
)
memory_
.
data
())
T
(
std
::
move
(
other
.
object
()));
other
.
initialized_
=
false
;
initialized_
=
true
;
return
*
this
;
}
return
*
this
;
}
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
...
@@ -62,14 +85,24 @@ class delayed_initialization {
...
@@ -62,14 +85,24 @@ class delayed_initialization {
return
*
reinterpret_cast
<
T
*>
(
memory_
.
data
());
return
*
reinterpret_cast
<
T
*>
(
memory_
.
data
());
}
}
const
T
&
object
()
const
{
PLS_ASSERT
(
initialized_
,
"Can not use an uninitialized delayed wrapper object!"
);
return
*
reinterpret_cast
<
const
T
*>
(
memory_
.
data
());
}
T
&
operator
*
()
{
T
&
operator
*
()
{
return
object
();
return
object
();
}
}
const
T
&
operator
*
()
const
{
return
object
();
}
bool
initialized
()
const
{
return
initialized_
;
}
bool
initialized
()
const
{
return
initialized_
;
}
private
:
private
:
std
::
array
<
char
,
sizeof
(
T
)
>
memory_
;
alignas
(
alignof
(
T
))
std
::
array
<
char
,
sizeof
(
T
)
>
memory_
;
bool
initialized_
;
bool
initialized_
;
};
};
...
...
lib/pls/include/pls/internal/data_structures/optional.h
View file @
e34ea267
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#define PLS_INTERNAL_DATA_STRUCTURES_OPTIONAL_H_
#define PLS_INTERNAL_DATA_STRUCTURES_OPTIONAL_H_
#include <utility>
#include <utility>
#include <type_traits>
#include "pls/internal/data_structures/delayed_initialization.h"
#include "pls/internal/data_structures/delayed_initialization.h"
...
@@ -14,6 +15,38 @@ template<typename T>
...
@@ -14,6 +15,38 @@ template<typename T>
class
optional
{
class
optional
{
public
:
public
:
optional
()
=
default
;
optional
()
=
default
;
optional
(
optional
&
other
)
noexcept
:
optional
(
const_cast
<
const
optional
&>
(
other
))
{};
optional
(
const
optional
&
other
)
noexcept
{
if
(
other
)
{
data_
.
initialize
(
other
.
data_
.
object
());
}
}
optional
(
optional
&&
other
)
noexcept
{
data_
=
std
::
move
(
other
.
data_
);
}
optional
&
operator
=
(
const
optional
&
other
)
{
if
(
&
other
==
this
)
{
return
*
this
;
}
if
(
data_
.
initialized
())
{
data_
.
destroy
();
}
if
(
other
)
{
data_
.
initialize
(
other
.
data_
.
object
());
}
return
*
this
;
}
optional
&
operator
=
(
optional
&&
other
)
noexcept
{
if
(
&
other
==
this
)
{
return
*
this
;
}
data_
=
std
::
move
(
other
.
data_
);
return
*
this
;
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
explicit
optional
(
ARGS
&&
...
args
)
:
data_
{
std
::
forward
<
ARGS
>
(
args
)...}
{}
explicit
optional
(
ARGS
&&
...
args
)
:
data_
{
std
::
forward
<
ARGS
>
(
args
)...}
{}
...
...
lib/pls/include/pls/internal/scheduling/cont.h
View file @
e34ea267
...
@@ -46,6 +46,7 @@ class base_cont {
...
@@ -46,6 +46,7 @@ class base_cont {
* Will store the result in it's parent, but not mess with any counters.
* Will store the result in it's parent, but not mess with any counters.
*/
*/
virtual
void
execute_task
()
=
0
;
virtual
void
execute_task
()
=
0
;
virtual
base_task
*
get_task
()
=
0
;
virtual
void
*
get_right_result_pointer
()
=
0
;
virtual
void
*
get_right_result_pointer
()
=
0
;
virtual
void
*
get_left_result_pointer
()
=
0
;
virtual
void
*
get_left_result_pointer
()
=
0
;
...
@@ -120,14 +121,19 @@ class cont : public base_cont {
...
@@ -120,14 +121,19 @@ class cont : public base_cont {
void
execute
()
override
{
void
execute
()
override
{
using
result_type
=
decltype
(
function_
((
*
left_result_
).
value
(),
(
*
right_result_
).
value
()));
using
result_type
=
decltype
(
function_
((
*
left_result_
).
value
(),
(
*
right_result_
).
value
()));
result_runner
<
result_type
>::
execute
(
*
this
);
result_runner
<
result_type
>::
execute
(
*
this
);
this
->
get_memory_block
()
->
free_buffer
();
this
->
get_memory_block
()
->
reset_state
();
this
->~
cont
();
this
->~
cont
();
auto
*
memory_block
=
this
->
get_memory_block
();
memory_block
->
free_buffer
();
memory_block
->
reset_state
();
}
}
void
execute_task
()
override
{
void
execute_task
()
override
{
task_
.
execute
();
task_
.
execute
();
}
}
base_task
*
get_task
()
override
{
return
&
task_
;
}
void
*
get_left_result_pointer
()
override
{
void
*
get_left_result_pointer
()
override
{
return
&
left_result_
;
return
&
left_result_
;
...
@@ -136,10 +142,6 @@ class cont : public base_cont {
...
@@ -136,10 +142,6 @@ class cont : public base_cont {
return
&
right_result_
;
return
&
right_result_
;
}
}
T2
*
get_task
()
{
return
&
task_
;
}
private
:
private
:
// Initial data members. These slow down the fast path, try to init them lazy when possible.
// Initial data members. These slow down the fast path, try to init them lazy when possible.
F
function_
;
F
function_
;
...
...
lib/pls/include/pls/internal/scheduling/cont_manager.h
View file @
e34ea267
...
@@ -63,6 +63,25 @@ class cont_manager {
...
@@ -63,6 +63,25 @@ class cont_manager {
return
active_node_
;
return
active_node_
;
}
}
bool
is_clean
()
{
if
(
get_active_node
()
->
get_depth
()
==
0
)
{
memory_block
*
current_node
=
active_node_
;
for
(
size_t
i
=
1
;
i
<
num_conts_
;
i
++
)
{
if
(
current_node
->
get_prev
()
!=
nullptr
&&
current_node
->
get_prev
()
->
get_next
()
!=
current_node
)
{
return
false
;
}
if
(
current_node
->
is_buffer_used
())
{
return
false
;
}
current_node
=
current_node
->
get_next
();
}
}
else
{
return
false
;
}
return
true
;
}
// Manage the fall through behaviour/slow path behaviour
// Manage the fall through behaviour/slow path behaviour
bool
falling_through
()
const
{
bool
falling_through
()
const
{
return
fall_through_
;
return
fall_through_
;
...
@@ -93,23 +112,24 @@ class cont_manager {
...
@@ -93,23 +112,24 @@ class cont_manager {
fall_through_
=
false
;
fall_through_
=
false
;
// Keep the target chain before we execute, as this potentially frees the memory
// Keep the target chain before we execute, as this potentially frees the memory
auto
*
target_chain
=
notified_cont
->
get_memory_block
()
->
get_offered_chain
().
load
();
auto
*
target_memory_block
=
notified_cont
->
get_memory_block
();
auto
*
target_chain
=
target_memory_block
->
get_offered_chain
().
load
();
// Notify the next continuation of finishing a child...
// Notify the next continuation of finishing a child...
if
(
notified_cont
->
get_memory_block
()
->
get_results_missing
().
fetch_add
(
-
1
)
==
1
)
{
if
(
target_memory_block
->
get_results_missing
().
fetch_add
(
-
1
)
==
1
)
{
// ... we finished the continuation.
// ... we finished the continuation.
// We are now in charge continuing to execute the above continuation chain.
// We are now in charge continuing to execute the above continuation chain.
PLS_ASSERT
(
active_node_
->
get_prev
()
->
get_depth
()
==
notified_cont
->
get_memory_block
()
->
get_depth
(),
PLS_ASSERT
(
active_node_
->
get_prev
()
->
get_depth
()
==
target_memory_block
->
get_depth
(),
"We must hold the system invariant to be in the correct depth."
)
"We must hold the system invariant to be in the correct depth."
)
if
(
active_node_
->
get_prev
()
!=
notified_cont
->
get_memory_block
()
)
{
if
(
active_node_
->
get_prev
()
!=
target_memory_block
)
{
// We do not own the thing we will execute.
// We do not own the thing we will execute.
// Own it by swapping the chain belonging to it in.
// Own it by swapping the chain belonging to it in.
aquire_memory_chain
(
notified_cont
->
get_memory_block
()
);
aquire_memory_chain
(
target_memory_block
);
}
}
my_state
.
parent_cont_
=
notified_cont
->
get_parent
();
my_state
.
parent_cont_
=
notified_cont
->
get_parent
();
my_state
.
right_spawn_
=
notified_cont
->
is_right_child
();
my_state
.
right_spawn_
=
notified_cont
->
is_right_child
();
active_node_
=
notified_cont
->
get_memory_block
()
;
active_node_
=
target_memory_block
;
notified_cont
->
execute
();
notified_cont
->
execute
();
if
(
!
falling_through
()
&&
notified_cont
->
get_parent
()
!=
nullptr
)
{
if
(
!
falling_through
()
&&
notified_cont
->
get_parent
()
!=
nullptr
)
{
fall_through_and_notify_cont
(
notified_cont
->
get_parent
(),
notified_cont
->
is_right_child
());
fall_through_and_notify_cont
(
notified_cont
->
get_parent
(),
notified_cont
->
is_right_child
());
...
@@ -119,9 +139,9 @@ class cont_manager {
...
@@ -119,9 +139,9 @@ class cont_manager {
// ... we did not finish the last continuation.
// ... we did not finish the last continuation.
// We are no longer in charge of executing the above continuation chain.
// We are no longer in charge of executing the above continuation chain.
PLS_ASSERT
(
active_node_
->
get_prev
()
->
get_depth
()
==
notified_cont
->
get_memory_block
()
->
get_depth
(),
PLS_ASSERT
(
active_node_
->
get_prev
()
->
get_depth
()
==
target_memory_block
->
get_depth
(),
"We must hold the system invariant to be in the correct depth."
)
"We must hold the system invariant to be in the correct depth."
)
if
(
active_node_
->
get_prev
()
==
notified_cont
->
get_memory_block
()
)
{
if
(
active_node_
->
get_prev
()
==
target_memory_block
)
{
// We own the thing we are not allowed to execute.
// We own the thing we are not allowed to execute.
// Get rid of the ownership by using the offered chain.
// Get rid of the ownership by using the offered chain.
aquire_memory_chain
(
target_chain
);
aquire_memory_chain
(
target_chain
);
...
...
lib/pls/include/pls/internal/scheduling/memory_block.h
View file @
e34ea267
...
@@ -23,19 +23,22 @@ class memory_block {
...
@@ -23,19 +23,22 @@ class memory_block {
:
prev_
{
prev
},
:
prev_
{
prev
},
next_
{
nullptr
},
next_
{
nullptr
},
offered_chain_
{
nullptr
},
offered_chain_
{
nullptr
},
state_
{{
initialized
}},
results_missing_
{
2
},
results_missing_
{
2
},
memory_buffer_
{
memory_buffer
},
memory_buffer_
{
memory_buffer
},
memory_buffer_size_
{
memory_buffer_size
},
memory_buffer_size_
{
memory_buffer_size
},
memory_buffer_used_
{
false
},
memory_buffer_used_
{
false
},
depth_
{
depth
}
{};
depth_
{
depth
},
owner_
{
0
}
{};
template
<
typename
T
,
typename
...
ARGS
>
template
<
typename
T
,
typename
...
ARGS
>
T
*
place_in_buffer
(
ARGS
&&
...
args
)
{
T
*
place_in_buffer
(
ARGS
&&
...
args
)
{
PLS_ASSERT
(
!
memory_buffer_used_
,
"Must only allocate one continuation at once per node."
);
PLS_ASSERT
(
!
memory_buffer_used_
,
"Must only allocate one continuation at once per node."
);
memory_buffer_used_
=
true
;
memory_buffer_used_
=
true
;
return
new
(
memory_buffer_
)
T
(
std
::
forward
<
ARGS
>
(
args
)...);
auto
*
result
=
new
(
memory_buffer_
)
T
(
std
::
forward
<
ARGS
>
(
args
)...);
continuation_
=
result
;
return
result
;
}
}
void
free_buffer
()
{
void
free_buffer
()
{
PLS_ASSERT
(
memory_buffer_used_
,
"Can only free a memory spot when it is in use."
)
PLS_ASSERT
(
memory_buffer_used_
,
"Can only free a memory spot when it is in use."
)
...
@@ -44,14 +47,10 @@ class memory_block {
...
@@ -44,14 +47,10 @@ class memory_block {
bool
is_buffer_used
()
{
bool
is_buffer_used
()
{
return
memory_buffer_used_
;
return
memory_buffer_used_
;
}
}
base_cont
*
get_cont
()
{
// TODO: Fit the reset somewhere!!!
PLS_ASSERT
(
is_buffer_used
(),
"Can only read initialized buffer!"
);
// // Reset Associated counters
return
continuation_
;
// results_missing_.store(2);
}
// offered_chain_.store(nullptr);
// auto old_state = state_.load();
// state_.store({old_state.stamp + 1, initialized});
memory_block
*
get_prev
()
{
memory_block
*
get_prev
()
{
return
prev_
;
return
prev_
;
...
@@ -66,13 +65,6 @@ class memory_block {
...
@@ -66,13 +65,6 @@ class memory_block {
next_
=
next
;
next_
=
next
;
}
}
enum
state
{
initialized
,
execute_local
,
stealing
,
stolen
,
invalid
};
using
stamped_state
=
data_structures
::
stamped_integer
;
std
::
atomic
<
stamped_state
>
&
get_state
()
{
return
state_
;
}
std
::
atomic
<
memory_block
*>
&
get_offered_chain
()
{
std
::
atomic
<
memory_block
*>
&
get_offered_chain
()
{
return
offered_chain_
;
return
offered_chain_
;
}
}
...
@@ -87,11 +79,16 @@ class memory_block {
...
@@ -87,11 +79,16 @@ class memory_block {
void
reset_state
()
{
void
reset_state
()
{
offered_chain_
.
store
(
nullptr
);
offered_chain_
.
store
(
nullptr
);
auto
old_state
=
state_
.
load
();
state_
.
store
({
old_state
.
stamp
+
1
,
initialized
});
results_missing_
.
store
(
2
);
results_missing_
.
store
(
2
);
}
}
void
set_owner
(
int
owner
)
{
owner_
=
owner
;
}
int
get_owner
()
{
return
owner_
;
}
private
:
private
:
// Linked list property of memory blocks (a complete list represents a threads currently owned memory).
// Linked list property of memory blocks (a complete list represents a threads currently owned memory).
// Each block knows its chain start to allow stealing a whole chain in O(1)
// Each block knows its chain start to allow stealing a whole chain in O(1)
...
@@ -103,13 +100,6 @@ class memory_block {
...
@@ -103,13 +100,6 @@ class memory_block {
// For this we need the offered chain's element up to the point we can steal.
// For this we need the offered chain's element up to the point we can steal.
std
::
atomic
<
memory_block
*>
offered_chain_
;
std
::
atomic
<
memory_block
*>
offered_chain_
;
// The flag is needed for an ongoing stealing request.
// Stealing threads need to offer their memory block chain before the
// 'fully' own the stolen task. As long as that is not done the memory block
// chain can abort the steal request in order to be not blocked without a
// new, clean memory block chain to work with.
std
::
atomic
<
stamped_state
>
state_
;
// Management for coordinating concurrent result writing and stealing.
// Management for coordinating concurrent result writing and stealing.
// The result count decides atomically who gets to execute the continuation
// The result count decides atomically who gets to execute the continuation
// and who therefore get's to own this memory block chain.
// and who therefore get's to own this memory block chain.
...
@@ -120,6 +110,8 @@ class memory_block {
...
@@ -120,6 +110,8 @@ class memory_block {
// This memory is managed explicitly by the continuation manager and runtime system
// This memory is managed explicitly by the continuation manager and runtime system
// (they need to make sure to always call de-constructors and never allocate two continuations).
// (they need to make sure to always call de-constructors and never allocate two continuations).
char
*
memory_buffer_
;
char
*
memory_buffer_
;
base_cont
*
continuation_
;
// These two are only helper properties helping with bugs during development.
// These two are only helper properties helping with bugs during development.
size_t
memory_buffer_size_
;
size_t
memory_buffer_size_
;
bool
memory_buffer_used_
;
bool
memory_buffer_used_
;
...
@@ -128,6 +120,9 @@ class memory_block {
...
@@ -128,6 +120,9 @@ class memory_block {
// Swapping parts of a memory chain will not reorder it, as always parts of
// Swapping parts of a memory chain will not reorder it, as always parts of
// the same size are exchanged.
// the same size are exchanged.
const
int
depth_
;
const
int
depth_
;
// TODO: Remove, debug only
int
owner_
;
};
};
}
}
...
...
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
e34ea267
...
@@ -49,6 +49,7 @@ struct scheduler::starter {
...
@@ -49,6 +49,7 @@ struct scheduler::starter {
const
bool
is_right_cont
=
my_state
.
right_spawn_
;
const
bool
is_right_cont
=
my_state
.
right_spawn_
;
base_cont
*
parent_cont
=
my_state
.
parent_cont_
;
base_cont
*
parent_cont
=
my_state
.
parent_cont_
;
current_memory_block
->
set_owner
(
my_state
.
get_id
());
continuation_type
*
current_cont
=
current_memory_block
->
place_in_buffer
<
continuation_type
>
(
parent_cont
,
continuation_type
*
current_cont
=
current_memory_block
->
place_in_buffer
<
continuation_type
>
(
parent_cont
,
current_memory_block
,
current_memory_block
,
is_right_cont
,
is_right_cont
,
...
@@ -63,19 +64,32 @@ struct scheduler::starter {
...
@@ -63,19 +64,32 @@ struct scheduler::starter {
my_state
.
right_spawn_
=
false
;
my_state
.
right_spawn_
=
false
;
return_type_1
result_1
=
function_1_
();
return_type_1
result_1
=
function_1_
();
if
(
cont_manager
.
falling_through
())
{
if
(
cont_manager
.
falling_through
())
{
// Get our replacement from the task stack and store it for later use when we are actually blocked.
auto
traded_memory
=
my_state
.
get_task_manager
().
try_pop_local
();
current_cont
->
get_memory_block
()
->
get_offered_chain
().
store
(
*
traded_memory
);
// Unwind stack...
// Unwind stack...
return
result_type
{};
return
result_type
{};
}
}
// Try to call second function on fast path
// Try to call second function on fast path
if
(
my_state
.
get_task_manager
().
steal_local_task
())
{
auto
traded_memory
=
my_state
.
get_task_manager
().
try_pop_local
();
if
(
traded_memory
)
{
// The task got stolen...get_memory_block
// ...but we got a memory block that can be used if we block on this one.
current_cont
->
get_memory_block
()
->
get_offered_chain
().
store
(
*
traded_memory
);
// Main scheduling loop is responsible for entering the result to the slow path...
current_cont
->
store_left_result
(
std
::
move
(
result_1
));
cont_manager
.
fall_through_and_notify_cont
(
current_cont
,
false
);
// Unwind stack...
return
result_type
{};
}
else
{
my_state
.
right_spawn_
=
true
;
my_state
.
right_spawn_
=
true
;
return_type_2
result_2
=
function_2_
();
return_type_2
result_2
=
function_2_
();
if
(
cont_manager
.
falling_through
())
{
if
(
cont_manager
.
falling_through
())
{
// Main scheduling loop is responsible for entering the result to the slow path...
// Main scheduling loop is responsible for entering the result to the slow path...
current_cont
->
store_left_result
(
std
::
move
(
result_1
));
current_cont
->
store_left_result
(
std
::
move
(
result_1
));
auto
old_state
=
current_cont
->
get_memory_block
()
->
get_state
().
load
();
current_cont
->
get_memory_block
()
->
get_state
().
store
({
old_state
.
stamp
+
1
,
memory_block
::
invalid
});
current_cont
->
get_memory_block
()
->
get_results_missing
().
fetch_add
(
-
1
);
current_cont
->
get_memory_block
()
->
get_results_missing
().
fetch_add
(
-
1
);
// Unwind stack...
// Unwind stack...
return
result_type
{};
return
result_type
{};
...
@@ -101,12 +115,6 @@ struct scheduler::starter {
...
@@ -101,12 +115,6 @@ struct scheduler::starter {
}
}
return
cont_result
;
return
cont_result
;
}
}
// Main scheduling loop is responsible for entering the result to the slow path...
current_cont
->
store_left_result
(
std
::
move
(
result_1
));
cont_manager
.
fall_through_and_notify_cont
(
current_cont
,
false
);
// Unwind stack...
return
result_type
{};
};
};
};
};
...
...
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
View file @
e34ea267
...
@@ -28,7 +28,7 @@ class scheduler_memory {
...
@@ -28,7 +28,7 @@ class scheduler_memory {
virtual
thread_state
&
thread_state_for
(
size_t
id
)
=
0
;
virtual
thread_state
&
thread_state_for
(
size_t
id
)
=
0
;
};
};
template
<
size_t
MAX_THREADS
,
size_t
NUM_TASKS
,
size_t
MAX_TASK_STACK_SIZE
,
size_t
NUM_CONTS
,
size_t
MAX_CONT_SIZE
>
template
<
size_t
MAX_THREADS
,
size_t
NUM_TASKS
,
size_t
NUM_CONTS
,
size_t
MAX_CONT_SIZE
>
class
static_scheduler_memory
:
public
scheduler_memory
{
class
static_scheduler_memory
:
public
scheduler_memory
{
public
:
public
:
size_t
max_threads
()
const
override
{
size_t
max_threads
()
const
override
{
...
@@ -44,7 +44,7 @@ class static_scheduler_memory : public scheduler_memory {
...
@@ -44,7 +44,7 @@ class static_scheduler_memory : public scheduler_memory {
}
}
private
:
private
:
using
thread_state_type
=
thread_state_static
<
NUM_TASKS
,
MAX_TASK_STACK_SIZE
,
NUM_CONTS
,
MAX_CONT_SIZE
>
;
using
thread_state_type
=
thread_state_static
<
NUM_TASKS
,
NUM_CONTS
,
MAX_CONT_SIZE
>
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
std
::
array
<
base
::
thread
,
MAX_THREADS
>
threads_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
std
::
array
<
base
::
thread
,
MAX_THREADS
>
threads_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
std
::
array
<
thread_state_type
,
MAX_THREADS
>
thread_states_
;
alignas
(
base
::
system_details
::
CACHE_LINE_SIZE
)
std
::
array
<
thread_state_type
,
MAX_THREADS
>
thread_states_
;
...
@@ -78,7 +78,7 @@ class heap_scheduler_memory : public scheduler_memory {
...
@@ -78,7 +78,7 @@ class heap_scheduler_memory : public scheduler_memory {
}
}
private
:
private
:
using
thread_state_type
=
thread_state_static
<
NUM_TASKS
,
MAX_TASK_STACK_SIZE
,
NUM_CONTS
,
MAX_CONT_SIZE
>
;
using
thread_state_type
=
thread_state_static
<
NUM_TASKS
,
NUM_CONTS
,
MAX_CONT_SIZE
>
;
// thread_state_type is aligned at the cache line and therefore overaligned (C++ 11 does not require
// thread_state_type is aligned at the cache line and therefore overaligned (C++ 11 does not require
// the new operator to obey alignments bigger than 16, cache lines are usually 64).
// the new operator to obey alignments bigger than 16, cache lines are usually 64).
// To allow this object to be allocated using 'new' (which the vector does internally),
// To allow this object to be allocated using 'new' (which the vector does internally),
...
...
lib/pls/include/pls/internal/scheduling/task_manager.h
View file @
e34ea267
...
@@ -12,8 +12,9 @@
...
@@ -12,8 +12,9 @@
#include "pls/internal/scheduling/cont_manager.h"
#include "pls/internal/scheduling/cont_manager.h"
#include "pls/internal/scheduling/memory_block.h"
#include "pls/internal/scheduling/memory_block.h"
#include "pls/internal/data_structures/bounded_
ws
_deque.h"
#include "pls/internal/data_structures/bounded_
trading
_deque.h"
#include "pls/internal/data_structures/stamped_integer.h"
#include "pls/internal/data_structures/stamped_integer.h"
#include "pls/internal/data_structures/optional.h"
#include "pls/internal/base/spin_lock.h"
#include "pls/internal/base/spin_lock.h"
...
@@ -21,19 +22,6 @@ namespace pls {
...
@@ -21,19 +22,6 @@ namespace pls {
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
struct
task_handle
{
public
:
task_handle
()
:
task_
{
nullptr
},
task_memory_block_
{
nullptr
}
{};
explicit
task_handle
(
base_task
*
task
)
:
task_
{
task
},
task_memory_block_
{
task
->
get_cont
()
->
get_memory_block
()}
{};
base_task
*
task_
;
// This seems redundant first, but is needed for a race-free steal.
// It could happen that the task's memory is overwritten and the pointer to it's memory block gets invalid.
// We can do this more elegantly in the future.
memory_block
*
task_memory_block_
;
};
/**
/**
* Handles management of tasks in the system. Each thread has a local task manager,
* Handles management of tasks in the system. Each thread has a local task manager,
* responsible for allocating, freeing and publishing tasks for stealing.
* responsible for allocating, freeing and publishing tasks for stealing.
...
@@ -42,60 +30,60 @@ class task_manager {
...
@@ -42,60 +30,60 @@ class task_manager {
public
:
public
:
// Publishes a task on the stack, i.e. makes it visible for other threads to steal.
// Publishes a task on the stack, i.e. makes it visible for other threads to steal.
void
publish_task
(
base_task
*
task
)
{
void
publish_task
(
base_task
*
task
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
//
std::lock_guard<base::spin_lock> lock{lock_};
task_deque_
.
push_bot
tom
(
task_handle
{
task
}
);
task_deque_
.
push_bot
(
task
->
get_cont
()
->
get_memory_block
()
);
}
}
// Try to pop a local task from this task managers stack.
// Try to pop a local task from this task managers stack.
bool
steal_local_task
()
{
data_structures
::
optional
<
memory_block
*>
try_pop_local
()
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
//
std::lock_guard<base::spin_lock> lock{lock_};
return
task_deque_
.
pop_bot
tom
()
;
return
task_deque_
.
pop_bot
().
traded_
;
}
}
// Try to steal a task from a remote task_manager instance. The stolen task must be stored locally.
// Try to steal a task from a remote task_manager instance. The stolen task must be stored locally.
// Returns a pair containing the actual task and if the steal was successful.
// Returns a pair containing the actual task and if the steal was successful.
base_task
*
steal_remote_task
(
cont_manager
&
stealing_cont_manager
)
{
base_task
*
steal_remote_task
(
cont_manager
&
stealing_cont_manager
)
{
std
::
lock_guard
<
base
::
spin_lock
>
lock
{
lock_
};
// std::lock_guard<base::spin_lock> lock{lock_};
auto
peek
=
task_deque_
.
peek_top
();
// TODO: See if we can somehow make this trade lock free (and still be correct)
auto
stolen_task_handle
=
task_deque_
.
pop_top
();
if
(
std
::
get
<
0
>
(
peek
))
{
if
(
stolen_task_handle
)
{
memory_block
*
peeked_memory_block
=
(
*
std
::
get
<
0
>
(
peek
));
base_task
*
stolen_task
=
(
*
stolen_task_handle
).
task_
;
auto
peeked_depth
=
peeked_memory_block
->
get_depth
();
memory_block
*
stolen_task_memory
=
(
*
stolen_task_handle
).
task_memory_block_
;
auto
stolen_task_depth
=
stolen_task_memory
->
get_depth
();
auto
&
atomic_state
=
stolen_task_memory
->
get_state
();
auto
&
atomic_offered_chain
=
stolen_task_memory
->
get_offered_chain
();
// TODO: We ignore all we tried with lock free implementations here, just store the state how it is supposed to be
stealing_cont_manager
.
move_active_node
(
peeked_depth
);
stealing_cont_manager
.
move_active_node
(
stolen_task_depth
);
auto
offered_chain
=
stealing_cont_manager
.
get_active_node
();
auto
offered_chain
=
stealing_cont_manager
.
get_active_node
();
stealing_cont_manager
.
move_active_node
(
1
);
stealing_cont_manager
.
move_active_node
(
1
);
atomic_offered_chain
.
store
(
offered_chain
);
atomic_state
.
store
(
memory_block
::
stolen
);
return
stolen_task
;
auto
stolen_memory_block
=
task_deque_
.
pop_top
(
offered_chain
,
std
::
get
<
1
>
(
peek
));
if
(
stolen_memory_block
)
{
PLS_ASSERT
(
*
stolen_memory_block
==
peeked_memory_block
,
"Steal must only work if it is equal!"
);
return
(
*
stolen_memory_block
)
->
get_cont
()
->
get_task
();
}
else
{
}
else
{
stealing_cont_manager
.
move_active_node
(
-
(
peeked_depth
+
1
));
return
nullptr
;
return
nullptr
;
}
}
}
}
explicit
task_manager
(
data_structures
::
bounded_ws_deque
<
task_handle
>
&
task_deque
)
:
task_deque_
{
task_deque
},
return
nullptr
;
lock_
{}
{}
}
explicit
task_manager
(
data_structures
::
bounded_trading_deque
<
memory_block
,
memory_block
>
&
task_deque
)
:
task_deque_
{
task_deque
}
{}
private
:
private
:
data_structures
::
bounded_
ws_deque
<
task_handle
>
&
task_deque_
;
data_structures
::
bounded_
trading_deque
<
memory_block
,
memory_block
>
&
task_deque_
;
base
::
spin_lock
lock_
;
base
::
spin_lock
lock_
{}
;
};
};
template
<
size_t
NUM_TASKS
,
size_t
MAX_STACK_SIZE
>
template
<
size_t
NUM_TASKS
>
class
static_task_manager
{
class
static_task_manager
{
public
:
public
:
static_task_manager
()
:
task_deque_
{},
task_manager_
{
task_deque_
.
get_deque
()}
{};
static_task_manager
()
:
task_deque_
{},
task_manager_
{
task_deque_
.
get_deque
()}
{};
task_manager
&
get_task_manager
()
{
return
task_manager_
;
}
task_manager
&
get_task_manager
()
{
return
task_manager_
;
}
private
:
private
:
data_structures
::
static_bounded_
ws_deque
<
task_handle
,
NUM_TASKS
>
task_deque_
;
data_structures
::
static_bounded_
trading_deque
<
memory_block
,
memory_block
,
NUM_TASKS
>
task_deque_
;
task_manager
task_manager_
;
task_manager
task_manager_
;
};
};
...
...
lib/pls/include/pls/internal/scheduling/thread_state_static.h
View file @
e34ea267
...
@@ -11,7 +11,7 @@ namespace pls {
...
@@ -11,7 +11,7 @@ namespace pls {
namespace
internal
{
namespace
internal
{
namespace
scheduling
{
namespace
scheduling
{
template
<
size_t
NUM_TASKS
,
size_t
MAX_TASK_STACK_SIZE
,
size_t
NUM_CONTS
,
size_t
MAX_CONT_SIZE
>
template
<
size_t
NUM_TASKS
,
size_t
NUM_CONTS
,
size_t
MAX_CONT_SIZE
>
struct
thread_state_static
{
struct
thread_state_static
{
public
:
public
:
thread_state_static
()
thread_state_static
()
...
@@ -21,7 +21,7 @@ struct thread_state_static {
...
@@ -21,7 +21,7 @@ struct thread_state_static {
thread_state
&
get_thread_state
()
{
return
thread_state_
;
}
thread_state
&
get_thread_state
()
{
return
thread_state_
;
}
private
:
private
:
static_task_manager
<
NUM_TASKS
,
MAX_TASK_STACK_SIZE
>
static_task_manager_
;
static_task_manager
<
NUM_TASKS
>
static_task_manager_
;
static_cont_manager
<
NUM_CONTS
,
MAX_CONT_SIZE
>
static_cont_manager_
;
static_cont_manager
<
NUM_CONTS
,
MAX_CONT_SIZE
>
static_cont_manager_
;
thread_state
thread_state_
;
thread_state
thread_state_
;
};
};
...
...
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
e34ea267
...
@@ -83,7 +83,7 @@ void scheduler::work_thread_work_section() {
...
@@ -83,7 +83,7 @@ void scheduler::work_thread_work_section() {
auto
&
target_state
=
my_state
.
get_scheduler
().
thread_state_for
(
target
);
auto
&
target_state
=
my_state
.
get_scheduler
().
thread_state_for
(
target
);
PLS_ASSERT
(
my_cont_manager
.
get_active_node
()
->
get_depth
()
==
0
,
"Only steal with clean chain!"
);
PLS_ASSERT
(
my_cont_manager
.
is_clean
()
,
"Only steal with clean chain!"
);
auto
*
stolen_task
=
target_state
.
get_task_manager
().
steal_remote_task
(
my_cont_manager
);
auto
*
stolen_task
=
target_state
.
get_task_manager
().
steal_remote_task
(
my_cont_manager
);
if
(
stolen_task
!=
nullptr
)
{
if
(
stolen_task
!=
nullptr
)
{
my_state
.
parent_cont_
=
stolen_task
->
get_cont
();
my_state
.
parent_cont_
=
stolen_task
->
get_cont
();
...
@@ -99,6 +99,7 @@ void scheduler::work_thread_work_section() {
...
@@ -99,6 +99,7 @@ void scheduler::work_thread_work_section() {
}
}
}
while
(
!
work_section_done_
);
}
while
(
!
work_section_done_
);
PLS_ASSERT
(
my_cont_manager
.
is_clean
(),
"Only finish work section with clean chain!"
);
}
}
void
scheduler
::
terminate
()
{
void
scheduler
::
terminate
()
{
...
...
test/data_structures_test.cpp
View file @
e34ea267
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#include "pls/internal/base/system_details.h"
#include "pls/internal/base/system_details.h"
#include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/data_structures/bounded_trading_deque.h"
using
namespace
pls
::
internal
::
data_structures
;
using
namespace
pls
::
internal
::
data_structures
;
using
namespace
pls
::
internal
::
base
;
using
namespace
pls
::
internal
::
base
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment