Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3535cbd8
authored
Apr 09, 2019
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Cache Align scheduler_memory.
parent
e2e34b02
Pipeline
#1144
passed with stages
in 3 minutes 32 seconds
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
118 additions
and
61 deletions
+118
-61
app/playground/main.cpp
+2
-4
app/test_for_new/main.cpp
+1
-1
lib/pls/CMakeLists.txt
+1
-0
lib/pls/include/pls/internal/base/alignment.h
+29
-0
lib/pls/include/pls/internal/base/system_details.h
+3
-1
lib/pls/include/pls/internal/data_structures/aligned_stack.h
+4
-6
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
+33
-18
lib/pls/src/internal/base/alignment.cpp
+27
-0
lib/pls/src/internal/data_structures/aligned_stack.cpp
+1
-14
lib/pls/src/internal/scheduling/scheduler_memory.cpp
+6
-6
test/data_structures_test.cpp
+11
-11
No files found.
app/playground/main.cpp
View file @
3535cbd8
...
...
@@ -12,10 +12,8 @@
using
namespace
pls
;
int
main
()
{
using
aligned_state
=
std
::
aligned_storage
<
sizeof
(
internal
::
scheduling
::
thread_state
),
64
>::
type
;
aligned_state
data
;
std
::
cout
<<
sizeof
(
aligned_state
)
<<
std
::
endl
;
malloc_scheduler_memory
sched_memory
{
8
};
std
::
cout
<<
(
std
::
uintptr_t
)
sched_memory
.
thread_for
(
0
)
%
64
<<
", "
<<
(
std
::
uintptr_t
)
sched_memory
.
thread_for
(
1
)
%
64
<<
", "
<<
(
std
::
uintptr_t
)
sched_memory
.
thread_for
(
2
)
%
64
<<
", "
<<
std
::
endl
;
std
::
cout
<<
(
std
::
uintptr_t
)
sched_memory
.
thread_state_for
(
0
)
%
64
<<
", "
<<
(
std
::
uintptr_t
)
sched_memory
.
thread_state_for
(
1
)
%
64
<<
", "
<<
(
std
::
uintptr_t
)
sched_memory
.
thread_state_for
(
2
)
%
64
<<
", "
<<
std
::
endl
;
std
::
cout
<<
(
std
::
uintptr_t
)
sched_memory
.
task_stack_for
(
0
)
%
64
<<
", "
<<
(
std
::
uintptr_t
)
sched_memory
.
task_stack_for
(
1
)
%
64
<<
", "
<<
(
std
::
uintptr_t
)
sched_memory
.
task_stack_for
(
2
)
%
64
<<
", "
<<
std
::
endl
;
}
app/test_for_new/main.cpp
View file @
3535cbd8
#include <pls/internal/base/thread.h>
#include <pls/internal/helpers/prohibit_new.h>
using
namespace
pls
::
internal
::
data_structures
;
using
namespace
pls
::
internal
::
base
;
int
global
=
0
;
...
...
lib/pls/CMakeLists.txt
View file @
3535cbd8
...
...
@@ -9,6 +9,7 @@ add_library(pls STATIC
include/pls/internal/base/barrier.h src/internal/base/barrier.cpp
include/pls/internal/base/system_details.h
include/pls/internal/base/error_handling.h
include/pls/internal/base/alignment.h src/internal/base/alignment.cpp
include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp
include/pls/internal/data_structures/deque.h src/internal/data_structures/deque.cpp
...
...
lib/pls/include/pls/internal/base/alignment.h
0 → 100644
View file @
3535cbd8
#ifndef PLS_ALIGNMENT_H
#define PLS_ALIGNMENT_H
#include <cstdint>
#include <cstdlib>
#include "system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
template
<
typename
T
>
struct
aligned_wrapper
{
alignas
(
system_details
::
CACHE_LINE_SIZE
)
unsigned
char
data
[
sizeof
(
T
)];
T
*
pointer
()
{
return
reinterpret_cast
<
T
*>
(
data
);
}
};
void
*
allocate_aligned
(
size_t
size
);
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
char
*
next_alignment
(
char
*
pointer
);
}
}
}
}
#endif //PLS_ALIGNMENT_H
lib/pls/include/pls/internal/base/system_details.h
View file @
3535cbd8
...
...
@@ -13,7 +13,9 @@ namespace pls {
* PORTABILITY:
* Currently sane default values for x86.
*/
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
namespace
system_details
{
constexpr
std
::
uintptr_t
CACHE_LINE_SIZE
=
64
;
}
}
}
}
...
...
lib/pls/include/pls/internal/data_structures/aligned_stack.h
View file @
3535cbd8
...
...
@@ -6,6 +6,7 @@
#include <cstdlib>
#include "pls/internal/base/error_handling.h"
#include "pls/internal/base/alignment.h"
namespace
pls
{
namespace
internal
{
...
...
@@ -29,14 +30,11 @@ namespace pls {
// Current head will always be aligned to cache lines
char
*
head_
;
static
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
);
static
char
*
next_alignment
(
char
*
pointer
);
public
:
typedef
char
*
state
;
aligned_stack
()
:
memory_start_
{
nullptr
},
memory_end_
{
nullptr
},
head_
{
nullptr
}
{};
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
);
aligned_stack
(
char
*
memory_region
,
std
::
size_t
size
);
template
<
typename
T
>
T
*
push
(
const
T
&
object
)
{
...
...
@@ -49,7 +47,7 @@ namespace pls {
void
*
result
=
reinterpret_cast
<
T
*>
(
head_
);
// Move head to next aligned position after new object
head_
=
next_alignment
(
head_
+
sizeof
(
T
));
head_
=
base
::
alignment
::
next_alignment
(
head_
+
sizeof
(
T
));
if
(
head_
>=
memory_end_
)
{
PLS_ERROR
(
"Tried to allocate object on alligned_stack without sufficient memory!"
);
}
...
...
@@ -59,7 +57,7 @@ namespace pls {
template
<
typename
T
>
T
pop
()
{
head_
=
head_
-
next_alignment
(
sizeof
(
T
));
head_
=
head_
-
base
::
alignment
::
next_alignment
(
sizeof
(
T
));
return
*
reinterpret_cast
<
T
*>
(
head_
);
}
...
...
lib/pls/include/pls/internal/scheduling/scheduler_memory.h
View file @
3535cbd8
...
...
@@ -14,7 +14,7 @@ namespace pls {
class
scheduler_memory
{
public
:
virtual
size_t
max_threads
()
=
0
;
virtual
size_t
max_threads
()
const
=
0
;
virtual
thread_state
*
thread_state_for
(
size_t
id
)
=
0
;
virtual
scheduler_thread
*
thread_for
(
size_t
id
)
=
0
;
virtual
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
=
0
;
...
...
@@ -22,39 +22,54 @@ namespace pls {
template
<
size_t
MAX_THREADS
,
size_t
TASK_STACK_SIZE
>
class
static_scheduler_memory
:
public
scheduler_memory
{
std
::
array
<
scheduler_thread
,
MAX_THREADS
>
threads_
;
std
::
array
<
thread_state
,
MAX_THREADS
>
thread_states_
;
std
::
array
<
std
::
array
<
char
,
TASK_STACK_SIZE
>
,
MAX_THREADS
>
task_stacks_memory_
;
std
::
array
<
data_structures
::
aligned_stack
,
MAX_THREADS
>
task_stacks_
;
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_thread_stack
=
base
::
alignment
::
aligned_wrapper
<
std
::
array
<
char
,
TASK_STACK_SIZE
>>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
std
::
array
<
aligned_thread
,
MAX_THREADS
>
threads_
;
std
::
array
<
aligned_thread_state
,
MAX_THREADS
>
thread_states_
;
std
::
array
<
aligned_thread_stack
,
MAX_THREADS
>
task_stacks_memory_
;
std
::
array
<
aligned_aligned_stack
,
MAX_THREADS
>
task_stacks_
;
public
:
static_scheduler_memory
()
{
for
(
size_t
i
=
0
;
i
<
MAX_THREADS
;
i
++
)
{
task_stacks_
[
i
]
=
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
data
(),
TASK_STACK_SIZE
);
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
].
pointer
()
->
data
(),
TASK_STACK_SIZE
);
}
}
size_t
max_threads
()
override
{
return
MAX_THREADS
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
&
thread_states_
[
id
]
;
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
&
threads_
[
id
]
;
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
&
task_stacks_
[
id
]
;
}
size_t
max_threads
()
const
override
{
return
MAX_THREADS
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
()
;
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
()
;
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
()
;
}
};
class
malloc_scheduler_memory
:
public
scheduler_memory
{
size_t
num_threads_
;
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
using
aligned_thread
=
base
::
alignment
::
aligned_wrapper
<
scheduler_thread
>
;
using
aligned_thread_state
=
base
::
alignment
::
aligned_wrapper
<
thread_state
>
;
using
aligned_aligned_stack
=
base
::
alignment
::
aligned_wrapper
<
data_structures
::
aligned_stack
>
;
const
size_t
num_threads_
;
align
as
(
64
)
scheduler
_thread
*
threads_
;
thread_state
*
thread_states_
;
align
ed
_thread
*
threads_
;
aligned_thread_state
*
thread_states_
;
char
**
task_stacks_memory_
;
data_structures
::
aligned_stack
*
task_stacks_
;
aligned_aligned_stack
*
task_stacks_
;
public
:
explicit
malloc_scheduler_memory
(
size_t
num_threads
,
size_t
memory_per_stack
=
2
<<
16
);
~
malloc_scheduler_memory
();
size_t
max_threads
()
override
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
&
thread_states_
[
id
]
;
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
&
threads_
[
id
]
;
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
&
task_stacks_
[
id
]
;
}
size_t
max_threads
()
const
override
{
return
num_threads_
;
}
thread_state
*
thread_state_for
(
size_t
id
)
override
{
return
thread_states_
[
id
].
pointer
()
;
}
scheduler_thread
*
thread_for
(
size_t
id
)
override
{
return
threads_
[
id
].
pointer
()
;
}
data_structures
::
aligned_stack
*
task_stack_for
(
size_t
id
)
override
{
return
task_stacks_
[
id
].
pointer
()
;
}
};
}
}
...
...
lib/pls/src/internal/base/alignment.cpp
0 → 100644
View file @
3535cbd8
#include "pls/internal/base/alignment.h"
#include "pls/internal/base/system_details.h"
namespace
pls
{
namespace
internal
{
namespace
base
{
namespace
alignment
{
void
*
allocate_aligned
(
size_t
size
)
{
return
aligned_alloc
(
system_details
::
CACHE_LINE_SIZE
,
size
);
}
std
::
uintptr_t
next_alignment
(
std
::
uintptr_t
size
)
{
std
::
uintptr_t
miss_alignment
=
size
%
base
::
system_details
::
CACHE_LINE_SIZE
;
if
(
miss_alignment
==
0
)
{
return
size
;
}
else
{
return
size
+
(
base
::
system_details
::
CACHE_LINE_SIZE
-
miss_alignment
);
}
}
char
*
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
}
}
}
}
lib/pls/src/internal/data_structures/aligned_stack.cpp
View file @
3535cbd8
...
...
@@ -7,20 +7,7 @@ namespace pls {
aligned_stack
::
aligned_stack
(
char
*
memory_region
,
const
std
::
size_t
size
)
:
memory_start_
{
memory_region
},
memory_end_
{
memory_region
+
size
},
head_
{
next_alignment
(
memory_start_
)}
{}
std
::
uintptr_t
aligned_stack
::
next_alignment
(
std
::
uintptr_t
size
)
{
std
::
uintptr_t
miss_alignment
=
size
%
base
::
CACHE_LINE_SIZE
;
if
(
miss_alignment
==
0
)
{
return
size
;
}
else
{
return
size
+
(
base
::
CACHE_LINE_SIZE
-
miss_alignment
);
}
}
char
*
aligned_stack
::
next_alignment
(
char
*
pointer
)
{
return
reinterpret_cast
<
char
*>
(
next_alignment
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer
)));
}
head_
{
base
::
alignment
::
next_alignment
(
memory_start_
)}
{}
}
}
}
lib/pls/src/internal/scheduling/scheduler_memory.cpp
View file @
3535cbd8
...
...
@@ -5,14 +5,14 @@ namespace pls {
namespace
scheduling
{
malloc_scheduler_memory
::
malloc_scheduler_memory
(
const
size_t
num_threads
,
const
size_t
memory_per_stack
)
:
num_threads_
{
num_threads
}
{
threads_
=
reinterpret_cast
<
scheduler_thread
*>
(
malloc
(
num_threads
*
sizeof
(
scheduler
_thread
)));
thread_states_
=
reinterpret_cast
<
thread_state
*>
(
malloc
(
num_threads
*
sizeof
(
thread_state
)));
threads_
=
reinterpret_cast
<
aligned_thread
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned
_thread
)));
thread_states_
=
reinterpret_cast
<
aligned_thread_state
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_
thread_state
)));
task_stacks_
=
reinterpret_cast
<
data_structures
::
aligned_stack
*>
(
malloc
(
num_threads
*
sizeof
(
data_structures
::
aligned_stack
)));
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
malloc
(
num_threads
*
sizeof
(
char
*
)));
task_stacks_
=
reinterpret_cast
<
aligned_aligned_stack
*>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
aligned_
aligned_stack
)));
task_stacks_memory_
=
reinterpret_cast
<
char
**>
(
base
::
alignment
::
allocate_aligned
(
num_threads
*
sizeof
(
char
*
)));
for
(
size_t
i
=
0
;
i
<
num_threads_
;
i
++
)
{
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
malloc
(
memory_per_stack
));
task_stacks_
[
i
]
=
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
task_stacks_memory_
[
i
]
=
reinterpret_cast
<
char
*>
(
base
::
alignment
::
allocate_aligned
(
memory_per_stack
));
new
((
void
*
)
task_stacks_
[
i
].
pointer
())
data_structures
::
aligned_stack
(
task_stacks_memory_
[
i
],
memory_per_stack
);
}
}
...
...
test/data_structures_test.cpp
View file @
3535cbd8
...
...
@@ -27,20 +27,20 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
auto
pointer_two
=
stack
.
push
(
small_data_two
);
auto
pointer_three
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly pushes above linesize objects"
)
{
std
::
array
<
char
,
5
>
small_data_one
{
'a'
,
'b'
,
'c'
,
'd'
,
'e'
};
std
::
array
<
char
,
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
std
::
array
<
char
,
system_details
::
CACHE_LINE_SIZE
+
10
>
big_data_one
{};
auto
big_pointer_one
=
stack
.
push
(
big_data_one
);
auto
small_pointer_one
=
stack
.
push
(
small_data_one
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
big_pointer_one
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
big_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
small_pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
}
SECTION
(
"stack correctly stores and retrieves objects"
)
{
...
...
@@ -65,11 +65,11 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
auto
pointer_four
=
stack
.
push
(
small_data_two
);
auto
pointer_five
=
stack
.
push
(
small_data_three
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_four
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_five
)
%
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_one
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_two
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_three
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_four
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
reinterpret_cast
<
std
::
uintptr_t
>
(
pointer_five
)
%
system_details
::
CACHE_LINE_SIZE
==
0
);
REQUIRE
(
pointer_four
==
pointer_two
);
REQUIRE
(
pointer_five
==
pointer_three
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment