Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
92ee564c
authored
Jun 04, 2020
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WIP: Start work on divide and conquer example with temporary buffers.
parent
a81e082d
Pipeline
#1500
passed with stages
in 4 minutes 9 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
223 additions
and
17 deletions
+223
-17
CMakeLists.txt
+1
-0
app/benchmark_fib/main.cpp
+6
-8
app/benchmark_matrix_div_conquer/CMakeLists.txt
+5
-0
app/benchmark_matrix_div_conquer/main.cpp
+194
-0
lib/pls/CMakeLists.txt
+1
-1
lib/pls/include/pls/algorithms/divide_and_conquer_buffers.h
+5
-0
lib/pls/include/pls/internal/base/system_details.h
+1
-1
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
+3
-3
lib/pls/src/internal/scheduling/strain_local_resource.cpp
+7
-4
No files found.
CMakeLists.txt
View file @
92ee564c
...
@@ -48,6 +48,7 @@ add_subdirectory(app/playground)
...
@@ -48,6 +48,7 @@ add_subdirectory(app/playground)
add_subdirectory
(
app/benchmark_fft
)
add_subdirectory
(
app/benchmark_fft
)
add_subdirectory
(
app/benchmark_unbalanced
)
add_subdirectory
(
app/benchmark_unbalanced
)
add_subdirectory
(
app/benchmark_matrix
)
add_subdirectory
(
app/benchmark_matrix
)
add_subdirectory
(
app/benchmark_matrix_div_conquer
)
add_subdirectory
(
app/benchmark_fib
)
add_subdirectory
(
app/benchmark_fib
)
add_subdirectory
(
app/context_switch
)
add_subdirectory
(
app/context_switch
)
...
...
app/benchmark_fib/main.cpp
View file @
92ee564c
#include "pls/pls.h"
#include "pls/pls.h"
#include <iostream>
#include "benchmark_runner.h"
#include "benchmark_runner.h"
#include "benchmark_base/fib.h"
#include "benchmark_base/fib.h"
...
@@ -10,7 +8,7 @@ using namespace comparison_benchmarks::base;
...
@@ -10,7 +8,7 @@ using namespace comparison_benchmarks::base;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
int
pls_fib
(
int
n
)
{
int
pls_fib
(
int
n
,
int
d
)
{
if
(
n
==
0
)
{
if
(
n
==
0
)
{
return
0
;
return
0
;
}
}
...
@@ -19,11 +17,11 @@ int pls_fib(int n) {
...
@@ -19,11 +17,11 @@ int pls_fib(int n) {
}
}
int
a
,
b
;
int
a
,
b
;
pls
::
spawn
([
n
,
&
a
]()
{
pls
::
spawn
([
n
,
d
,
&
a
]()
{
a
=
pls_fib
(
n
-
1
);
a
=
pls_fib
(
n
-
1
,
d
+
1
);
});
});
pls
::
spawn
([
n
,
&
b
]()
{
pls
::
spawn
([
n
,
d
,
&
b
]()
{
b
=
pls_fib
(
n
-
2
);
b
=
pls_fib
(
n
-
2
,
d
+
1
);
});
});
pls
::
sync
();
pls
::
sync
();
...
@@ -45,7 +43,7 @@ int main(int argc, char **argv) {
...
@@ -45,7 +43,7 @@ int main(int argc, char **argv) {
// scheduler.get_profiler().disable_memory_measure();
// scheduler.get_profiler().disable_memory_measure();
runner
.
run_iterations
(
fib
::
NUM_ITERATIONS
,
[
&
]()
{
runner
.
run_iterations
(
fib
::
NUM_ITERATIONS
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
scheduler
.
perform_work
([
&
]()
{
res
=
pls_fib
(
fib
::
INPUT_N
);
res
=
pls_fib
(
fib
::
INPUT_N
,
0
);
});
});
},
fib
::
NUM_WARMUP_ITERATIONS
);
},
fib
::
NUM_WARMUP_ITERATIONS
);
// scheduler.get_profiler().current_run().print_dag(std::cout);
// scheduler.get_profiler().current_run().print_dag(std::cout);
...
...
app/benchmark_matrix_div_conquer/CMakeLists.txt
0 → 100644
View file @
92ee564c
add_executable
(
benchmark_matrix_div_conquer_pls_v3 main.cpp
)
target_link_libraries
(
benchmark_matrix_div_conquer_pls_v3 pls benchmark_runner benchmark_base
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_matrix_div_conquer_pls_v3 easy_profiler
)
endif
()
app/benchmark_matrix_div_conquer/main.cpp
0 → 100644
View file @
92ee564c
//#include "pls/pls.h"
//using namespace pls;
#include "benchmark_runner.h"
#include <memory>
#include <array>
#include <math.h>
// Helpers to directly index into blocked matrices
const
size_t
MAX_BLOCK_LOOKUP
=
256
;
std
::
array
<
std
::
array
<
size_t
,
MAX_BLOCK_LOOKUP
>
,
MAX_BLOCK_LOOKUP
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
result
.
at
(
i
,
j
)
=
0
;
}
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
void
multiply_div_conquer
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
if
(
size
<=
8
)
{
multiply_naive
(
size
,
result
,
a
,
b
);
return
;
}
// Temporary storage required for the intermediate results
std
::
unique_ptr
<
double
[]
>
data_1_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
// Handles to sub-matrices used
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
// Divide Work Into Sub-Calls
multiply_div_conquer
(
size
/
2
,
result_1_1_a
,
a_1_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_1_1_b
,
a_1_2
,
b_2_1
);
multiply_div_conquer
(
size
/
2
,
result_1_2_a
,
a_1_1
,
b_1_2
);
multiply_div_conquer
(
size
/
2
,
result_1_2_b
,
a_1_2
,
b_2_2
);
multiply_div_conquer
(
size
/
2
,
result_2_1_a
,
a_2_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_2_1_b
,
a_2_2
,
b_2_1
);
multiply_div_conquer
(
size
/
2
,
result_2_2_a
,
a_2_1
,
b_1_2
);
multiply_div_conquer
(
size
/
2
,
result_2_2_b
,
a_2_2
,
b_2_2
);
// Combine results
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
result_1_1
.
at
(
row
,
column
)
=
result_1_1_a
.
at
(
row
,
column
)
+
result_1_1_b
.
at
(
row
,
column
);
result_1_2
.
at
(
row
,
column
)
=
result_1_2_a
.
at
(
row
,
column
)
+
result_1_2_b
.
at
(
row
,
column
);
result_2_1
.
at
(
row
,
column
)
=
result_2_1_a
.
at
(
row
,
column
)
+
result_2_1_b
.
at
(
row
,
column
);
result_2_2
.
at
(
row
,
column
)
=
result_2_2_a
.
at
(
row
,
column
)
+
result_2_2_b
.
at
(
row
,
column
);
}
}
}
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
int
main
(
int
argc
,
char
**
argv
)
{
fill_block_lookup
(
MAX_BLOCK_LOOKUP
);
size_t
size
=
64
;
std
::
unique_ptr
<
double
[]
>
result_data_naive
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
result_data_div
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
a_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
b_data
{
new
double
[
size
*
size
]};
blocked_matrix_view
result_naive
{
result_data_naive
.
get
(),
size
};
blocked_matrix_view
result_div
{
result_data_div
.
get
(),
size
};
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
a
.
at
(
row
,
column
)
=
row
;
b
.
at
(
row
,
column
)
=
column
;
}
}
multiply_div_conquer
(
size
,
result_div
,
a
,
b
);
multiply_naive
(
size
,
result_naive
,
a
,
b
);
size_t
misses
=
0
;
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
if
(
result_div
.
at
(
row
,
column
)
!=
result_naive
.
at
(
row
,
column
))
{
misses
++
;
printf
(
"%5.5f
\t\t
"
,
result_div
.
at
(
row
,
column
)
-
result_naive
.
at
(
row
,
column
));
}
}
}
printf
(
"
\n
%d"
,
misses
);
// int num_threads;
// string directory;
// benchmark_runner::read_args(argc, argv, num_threads, directory);
//
// string test_name = to_string(num_threads) + ".csv";
// string full_directory = directory + "/PLS_v3/";
// benchmark_runner runner{full_directory, test_name};
//
// scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
//
// runner.run_iterations(1000, [&]() {
// scheduler.perform_work([&]() {
// });
// }, 100);
// runner.commit_results(true);
}
lib/pls/CMakeLists.txt
View file @
92ee564c
...
@@ -48,7 +48,7 @@ add_library(pls STATIC
...
@@ -48,7 +48,7 @@ add_library(pls STATIC
include/pls/internal/profiling/dag_node.h src/internal/profiling/dag_node.cpp
include/pls/internal/profiling/dag_node.h src/internal/profiling/dag_node.cpp
include/pls/internal/profiling/profiler.h src/internal/profiling/profiler.cpp
include/pls/internal/profiling/profiler.h src/internal/profiling/profiler.cpp
include/pls/internal/profiling/thread_stats.h src/internal/profiling/thread_stats.cpp
)
include/pls/internal/profiling/thread_stats.h src/internal/profiling/thread_stats.cpp
include/pls/algorithms/divide_and_conquer_buffers.h
)
# Dependencies for pls
# Dependencies for pls
target_link_libraries
(
pls Threads::Threads
)
target_link_libraries
(
pls Threads::Threads
)
...
...
lib/pls/include/pls/algorithms/divide_and_conquer_buffers.h
0 → 100644
View file @
92ee564c
#ifndef PLS_ALGORITHMS_DIVIDE_AND_CONQUER_BUFFERS_H_
#define PLS_ALGORITHMS_DIVIDE_AND_CONQUER_BUFFERS_H_
#endif //PLS_ALGORITHMS_DIVIDE_AND_CONQUER_BUFFERS_H_
lib/pls/include/pls/internal/base/system_details.h
View file @
92ee564c
...
@@ -47,7 +47,7 @@ constexpr size_t CACHE_LINE_SIZE = 64;
...
@@ -47,7 +47,7 @@ constexpr size_t CACHE_LINE_SIZE = 64;
/**
/**
* Helper to align types/values on cache lines.
* Helper to align types/values on cache lines.
*/
*/
#define PLS_CACHE_ALIGN alignas(base::system_details::CACHE_LINE_SIZE)
#define PLS_CACHE_ALIGN alignas(
::pls::internal::
base::system_details::CACHE_LINE_SIZE)
/**
/**
* Helper to find mmap page size. Either set constant or rely on system specific getter function.
* Helper to find mmap page size. Either set constant or rely on system specific getter function.
...
...
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
View file @
92ee564c
...
@@ -24,7 +24,7 @@ struct PLS_CACHE_ALIGN strain_resource {
...
@@ -24,7 +24,7 @@ struct PLS_CACHE_ALIGN strain_resource {
unsigned
const
index_
;
unsigned
const
index_
;
unsigned
const
depth_
;
unsigned
const
depth_
;
bool
used_
{
false
};
std
::
atomic
<
int
>
used_
{
0
};
std
::
atomic
<
strain_resource
*>
next_
{
nullptr
};
std
::
atomic
<
strain_resource
*>
next_
{
nullptr
};
};
};
...
@@ -48,7 +48,7 @@ class strain_local_resource {
...
@@ -48,7 +48,7 @@ class strain_local_resource {
item_handle
(
item_handle
&&
)
=
delete
;
item_handle
(
item_handle
&&
)
=
delete
;
item_handle
&
operator
=
(
item_handle
&&
)
=
delete
;
item_handle
&
operator
=
(
item_handle
&&
)
=
delete
;
explicit
item_handle
(
strain_resource
*
resource
)
:
resource_
{
resource
}
{}
explicit
item_handle
(
strain_resource
*
resource
)
;
~
item_handle
();
~
item_handle
();
unsigned
get_strain_index
()
{
unsigned
get_strain_index
()
{
...
@@ -56,7 +56,7 @@ class strain_local_resource {
...
@@ -56,7 +56,7 @@ class strain_local_resource {
}
}
private
:
private
:
strain_resource
*
resource_
;
strain_resource
*
const
resource_
;
};
};
strain_local_resource
(
unsigned
num_threads
,
strain_local_resource
(
unsigned
num_threads
,
...
...
lib/pls/src/internal/scheduling/strain_local_resource.cpp
View file @
92ee564c
...
@@ -23,11 +23,14 @@ strain_local_resource::item_handle strain_local_resource::get_item(unsigned dept
...
@@ -23,11 +23,14 @@ strain_local_resource::item_handle strain_local_resource::get_item(unsigned dept
active_task
->
attached_resources_
.
store
(
result
,
std
::
memory_order_relaxed
);
active_task
->
attached_resources_
.
store
(
result
,
std
::
memory_order_relaxed
);
// Wrap it for RAII usage on stack
// Wrap it for RAII usage on stack
PLS_ASSERT
(
!
result
->
used_
,
"Must not try to allocate an already used resource!"
);
result
->
used_
=
true
;
return
strain_local_resource
::
item_handle
{
result
};
return
strain_local_resource
::
item_handle
{
result
};
}
}
strain_local_resource
::
item_handle
::
item_handle
(
strain_resource
*
resource
)
:
resource_
{
resource
}
{
PLS_ASSERT
(
resource_
->
used_
.
fetch_add
(
1
,
std
::
memory_order_relaxed
)
==
0
,
"Must not create a handle of a already used resource!"
);
}
// Return item to locally owned items
// Return item to locally owned items
strain_local_resource
::
item_handle
::~
item_handle
()
{
strain_local_resource
::
item_handle
::~
item_handle
()
{
// Only change our resource usage when synced.
// Only change our resource usage when synced.
...
@@ -54,8 +57,8 @@ strain_local_resource::item_handle::~item_handle() {
...
@@ -54,8 +57,8 @@ strain_local_resource::item_handle::~item_handle() {
// Give the resource handle back to our local resource array
// Give the resource handle back to our local resource array
auto
&
local_resource
=
resource_
->
strain_local_resource_
->
local_items_
[
my_state
.
get_thread_id
()][
resource_
->
depth_
];
auto
&
local_resource
=
resource_
->
strain_local_resource_
->
local_items_
[
my_state
.
get_thread_id
()][
resource_
->
depth_
];
local_resource
.
resource_
=
resource_
;
local_resource
.
resource_
=
resource_
;
PLS_ASSERT
(
resource_
->
used_
,
"Must only release used resources!"
);
PLS_ASSERT
(
resource_
->
used_
.
fetch_sub
(
1
,
std
::
memory_order_relaxed
)
==
1
,
resource_
->
used_
=
false
;
"Accidentally freed resource that was accessed multiple times!"
)
;
}
}
strain_resource
*
strain_local_resource
::
get_local_copy
(
strain_resource
*
other_resources
,
unsigned
thread_id
)
{
strain_resource
*
strain_local_resource
::
get_local_copy
(
strain_resource
*
other_resources
,
unsigned
thread_id
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment