Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
86333a60
authored
Jun 13, 2020
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Final preparations for single-app benchmark runs.
parent
e2f584c4
Pipeline
#1515
passed with stages
in 4 minutes 37 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
47 additions
and
26 deletions
+47
-26
app/benchmark_matrix_div_conquer/main.cpp
+40
-24
lib/pls/include/pls/internal/scheduling/scheduler.h
+1
-1
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
+3
-1
lib/pls/include/pls/internal/scheduling/thread_state.h
+3
-0
No files found.
app/benchmark_matrix_div_conquer/main.cpp
View file @
86333a60
...
...
@@ -8,30 +8,39 @@ using namespace comparison_benchmarks::base;
#include <memory>
#include <array>
#include <algorithm>
#include <vector>
void
multiply_div_conquer
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
&
tmp_arrays
,
pls
::
strain_local_resource
&
local_indices
,
size_t
size
,
size_t
depth
,
size_t
branch
,
matrix_div_conquer
::
blocked_matrix_view
&
result
,
matrix_div_conquer
::
blocked_matrix_view
&
a
,
matrix_div_conquer
::
blocked_matrix_view
&
b
)
{
if
(
size
<=
8
)
{
if
(
size
<=
matrix_div_conquer
::
CUTOFF_SIZE
)
{
multiply_naive
(
size
,
result
,
a
,
b
);
return
;
}
// Temporary storage required for the intermediate results
auto
strain_local_index
=
local_indices
.
get_item
(
depth
);
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
0
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
1
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
2
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
3
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
4
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
5
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
6
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
7
];
size_t
index
;
if
(
depth
==
0
||
(
8u
<<
(
depth
-
1u
))
<=
local_indices
.
get_num_threads
())
{
index
=
branch
;
}
else
{
index
=
strain_local_index
.
get_strain_index
();
}
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_a
=
tmp_arrays
[
depth
][
index
][
0
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_b
=
tmp_arrays
[
depth
][
index
][
1
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_a
=
tmp_arrays
[
depth
][
index
][
2
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_b
=
tmp_arrays
[
depth
][
index
][
3
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_a
=
tmp_arrays
[
depth
][
index
][
4
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_b
=
tmp_arrays
[
depth
][
index
][
5
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_a
=
tmp_arrays
[
depth
][
index
][
6
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_b
=
tmp_arrays
[
depth
][
index
][
7
];
// Handles to sub-matrices used
matrix_div_conquer
::
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
...
...
@@ -60,31 +69,31 @@ void multiply_div_conquer(const std::vector<std::vector<std::vector<std::unique_
// Divide Work Into Sub-Calls
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_a
,
a_1_1
,
b_1_1
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
0
,
result_1_1_a
,
a_1_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_b
,
a_1_2
,
b_2_1
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
1
,
result_1_1_b
,
a_1_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_a
,
a_1_1
,
b_1_2
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
2
,
result_1_2_a
,
a_1_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_b
,
a_1_2
,
b_2_2
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
3
,
result_1_2_b
,
a_1_2
,
b_2_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_a
,
a_2_1
,
b_1_1
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
4
,
result_2_1_a
,
a_2_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_b
,
a_2_2
,
b_2_1
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
5
,
result_2_1_b
,
a_2_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_a
,
a_2_1
,
b_1_2
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
6
,
result_2_2_a
,
a_2_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_b
,
a_2_2
,
b_2_2
);
}
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
7
,
result_2_2_b
,
a_2_2
,
b_2_2
);
}
);
pls
::
sync
();
...
...
@@ -99,8 +108,8 @@ void multiply_div_conquer(const std::vector<std::vector<std::vector<std::unique_
}
}
constexpr
int
MAX_NUM_TASKS
=
1
6
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
2
;
constexpr
int
MAX_NUM_TASKS
=
1
0
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
int
main
(
int
argc
,
char
**
argv
)
{
auto
settings
=
benchmark_runner
::
parse_parameters
(
argc
,
argv
);
...
...
@@ -124,17 +133,24 @@ int main(int argc, char **argv) {
// Strain local data
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
div_conquer_temp_arrays
;
size_t
max_depth
=
0
;
size_t
buffers_needed
=
1
;
size_t
remaining_size
=
size
;
while
(
remaining_size
>
1
)
{
while
(
remaining_size
>
matrix_div_conquer
::
CUTOFF_SIZE
)
{
auto
&
depth_buffers
=
div_conquer_temp_arrays
.
emplace_back
();
for
(
int
thread_id
=
0
;
thread_id
<
8
;
thread_id
++
)
{
buffers_needed
=
std
::
min
(
buffers_needed
,
(
size_t
)
settings
.
num_threads_
);
for
(
int
thread_id
=
0
;
thread_id
<
buffers_needed
;
thread_id
++
)
{
auto
&
depth_thread_buffers
=
depth_buffers
.
emplace_back
();
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
depth_thread_buffers
.
emplace_back
(
new
double
[(
remaining_size
/
2
)
*
(
remaining_size
/
2
)]);
size_t
matrix_elements
=
(
remaining_size
/
2
)
*
(
remaining_size
/
2
);
depth_thread_buffers
.
emplace_back
(
new
double
[
matrix_elements
]);
for
(
size_t
j
=
0
;
j
<
matrix_elements
;
j
+=
32
)
{
depth_thread_buffers
[
i
][
j
]
=
1.0
;
// Touch memory
}
}
}
max_depth
++
;
buffers_needed
*=
8
;
remaining_size
=
remaining_size
/
2
;
}
pls
::
strain_local_resource
local_indices
{(
unsigned
)
settings
.
num_threads_
,
(
unsigned
)
max_depth
};
...
...
@@ -152,7 +168,7 @@ int main(int argc, char **argv) {
runner
.
run_iterations
(
settings
.
iterations_
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
result
,
a
,
b
);
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
0
,
result
,
a
,
b
);
});
});
runner
.
commit_results
(
true
);
...
...
@@ -163,7 +179,7 @@ int main(int argc, char **argv) {
runner
.
run_periodic
(
settings
.
iterations_
,
settings
.
interval_period_
,
settings
.
interval_deadline_
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
result
,
a
,
b
);
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
0
,
result
,
a
,
b
);
});
});
runner
.
commit_results
(
true
);
...
...
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
86333a60
...
...
@@ -45,7 +45,7 @@ class scheduler {
size_t
computation_depth
,
size_t
stack_size
,
bool
reuse_thread
=
true
,
size_t
serial_stack_size
=
4096
*
8
);
size_t
serial_stack_size
=
4096
*
1
);
template
<
typename
ALLOC
>
explicit
scheduler
(
unsigned
int
num_threads
,
...
...
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
View file @
86333a60
...
...
@@ -60,7 +60,7 @@ class strain_local_resource {
};
strain_local_resource
(
unsigned
num_threads
,
unsigned
depth
)
:
local_items_
(
num_threads
)
{
unsigned
depth
)
:
num_threads_
{
num_threads
},
local_items_
(
num_threads
)
{
for
(
unsigned
thread_id
=
0
;
thread_id
<
num_threads
;
thread_id
++
)
{
local_items_
[
thread_id
].
reserve
(
depth
);
for
(
unsigned
i
=
0
;
i
<
depth
;
i
++
)
{
...
...
@@ -70,11 +70,13 @@ class strain_local_resource {
}
}
[[
nodiscard
]]
unsigned
get_num_threads
()
const
{
return
num_threads_
;
}
item_handle
get_item
(
unsigned
depth
);
static
strain_resource
*
get_local_copy
(
strain_resource
*
other_resources
,
unsigned
thread_id
);
static
void
acquire_locally
(
strain_resource
*
other_resources
,
unsigned
thread_id
);
private
:
const
unsigned
num_threads_
;
std
::
vector
<
std
::
vector
<
local_item
>>
local_items_
;
};
...
...
lib/pls/include/pls/internal/scheduling/thread_state.h
View file @
86333a60
...
...
@@ -69,6 +69,9 @@ struct PLS_CACHE_ALIGN thread_state {
stack_allocator_
{
stack_allocator
},
serial_call_stack_size_
{
serial_call_stack_size
}
{
serial_call_stack_
=
stack_allocator
->
allocate_stack
(
serial_call_stack_size_
);
for
(
size_t
i
=
0
;
i
<
serial_call_stack_size
;
i
+=
base
::
system_details
::
CACHE_LINE_SIZE
)
{
serial_call_stack_
[
i
]
=
'a'
;
// Touch the stack
}
};
~
thread_state
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment