Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
8f47876d
authored
Jun 08, 2020
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add standard divide and conquer matrix multiplication test for comparison.
parent
92ee564c
Pipeline
#1502
passed with stages
in 4 minutes 13 seconds
Changes
6
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
227 additions
and
164 deletions
+227
-164
app/benchmark_matrix_div_conquer/main.cpp
+124
-160
extern/benchmark_base/CMakeLists.txt
+2
-1
extern/benchmark_base/include/benchmark_base/matrix_div_conquer.h
+96
-0
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+0
-1
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
+1
-2
lib/pls/include/pls/pls.h
+4
-0
No files found.
app/benchmark_matrix_div_conquer/main.cpp
View file @
8f47876d
//
#include "pls/pls.h"
#include "pls/pls.h"
//
using namespace pls;
using
namespace
pls
;
#include "benchmark_runner.h"
#include "benchmark_runner.h"
#include "benchmark_base/matrix_div_conquer.h"
using
namespace
comparison_benchmarks
::
base
;
#include <memory>
#include <memory>
#include <array>
#include <array>
#include <math.h>
#include <vector>
// Helpers to directly index into blocked matrices
const
size_t
MAX_BLOCK_LOOKUP
=
256
;
std
::
array
<
std
::
array
<
size_t
,
MAX_BLOCK_LOOKUP
>
,
MAX_BLOCK_LOOKUP
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
void
multiply_div_conquer
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
&
tmp_arrays
,
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
pls
::
strain_local_resource
&
local_indices
,
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
size_t
size
,
result
.
at
(
i
,
j
)
=
0
;
size_t
depth
,
}
matrix_div_conquer
::
blocked_matrix_view
&
result
,
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
matrix_div_conquer
::
blocked_matrix_view
&
a
,
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
matrix_div_conquer
::
blocked_matrix_view
&
b
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
void
multiply_div_conquer
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
if
(
size
<=
8
)
{
if
(
size
<=
8
)
{
multiply_naive
(
size
,
result
,
a
,
b
);
multiply_naive
(
size
,
result
,
a
,
b
);
return
;
return
;
}
}
// Temporary storage required for the intermediate results
// Temporary storage required for the intermediate results
std
::
unique_ptr
<
double
[]
>
data_1_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
auto
strain_local_index
=
local_indices
.
get_item
(
depth
);
std
::
unique_ptr
<
double
[]
>
data_1_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
0
];
std
::
unique_ptr
<
double
[]
>
data_1_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
1
];
std
::
unique_ptr
<
double
[]
>
data_1_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
2
];
std
::
unique_ptr
<
double
[]
>
data_2_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
3
];
std
::
unique_ptr
<
double
[]
>
data_2_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
4
];
std
::
unique_ptr
<
double
[]
>
data_2_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
5
];
std
::
unique_ptr
<
double
[]
>
data_2_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
6
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
7
];
// Handles to sub-matrices used
// Handles to sub-matrices used
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
// Divide Work Into Sub-Calls
// Divide Work Into Sub-Calls
multiply_div_conquer
(
size
/
2
,
result_1_1_a
,
a_1_1
,
b_1_1
);
pls
::
spawn
(
multiply_div_conquer
(
size
/
2
,
result_1_1_b
,
a_1_2
,
b_2_1
);
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_a
,
a_1_1
,
b_1_1
);
}
);
multiply_div_conquer
(
size
/
2
,
result_1_2_a
,
a_1_1
,
b_1_2
);
pls
::
spawn
(
multiply_div_conquer
(
size
/
2
,
result_1_2_b
,
a_1_2
,
b_2_2
);
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_b
,
a_1_2
,
b_2_1
);
}
);
multiply_div_conquer
(
size
/
2
,
result_2_1_a
,
a_2_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_2_1_b
,
a_2_2
,
b_2_1
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_a
,
a_1_1
,
b_1_2
);
}
multiply_div_conquer
(
size
/
2
,
result_2_2_a
,
a_2_1
,
b_1_2
);
);
multiply_div_conquer
(
size
/
2
,
result_2_2_b
,
a_2_2
,
b_2_2
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_b
,
a_1_2
,
b_2_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_a
,
a_2_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_b
,
a_2_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_a
,
a_2_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_b
,
a_2_2
,
b_2_2
);
}
);
pls
::
sync
();
// Combine results
// Combine results
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
i
=
0
;
i
<
(
size
/
2
)
*
(
size
/
2
);
i
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
// The layout is not important here, ass all have the same order, so just sum element wise
result_1_1
.
at
(
row
,
column
)
=
result_1_1_a
.
at
(
row
,
column
)
+
result_1_1_b
.
at
(
row
,
column
);
result_1_1
.
get_data
()[
i
]
=
result_1_1_a
.
get_data
()[
i
]
+
result_1_1_b
.
get_data
()[
i
];
result_1_2
.
at
(
row
,
column
)
=
result_1_2_a
.
at
(
row
,
column
)
+
result_1_2_b
.
at
(
row
,
column
);
result_1_2
.
get_data
()[
i
]
=
result_1_2_a
.
get_data
()[
i
]
+
result_1_2_b
.
get_data
()[
i
];
result_2_1
.
at
(
row
,
column
)
=
result_2_1_a
.
at
(
row
,
column
)
+
result_2_1_b
.
at
(
row
,
column
);
result_2_1
.
get_data
()[
i
]
=
result_2_1_a
.
get_data
()[
i
]
+
result_2_1_b
.
get_data
()[
i
];
result_2_2
.
at
(
row
,
column
)
=
result_2_2_a
.
at
(
row
,
column
)
+
result_2_2_b
.
at
(
row
,
column
);
result_2_2
.
get_data
()[
i
]
=
result_2_2_a
.
get_data
()[
i
]
+
result_2_2_b
.
get_data
()[
i
];
}
}
}
}
}
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
2
;
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
fill_block_lookup
(
MAX_BLOCK_LOOKUP
)
;
const
size_t
size
=
matrix_div_conquer
::
MATRIX_SIZE
;
size_t
size
=
64
;
int
num_threads
;
std
::
unique_ptr
<
double
[]
>
result_data_naive
{
new
double
[
size
*
size
]};
string
directory
;
std
::
unique_ptr
<
double
[]
>
result_data_div
{
new
double
[
size
*
size
]};
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v3/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
// Only run on one version to avoid copy
std
::
unique_ptr
<
double
[]
>
result_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
a_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
a_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
b_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
b_data
{
new
double
[
size
*
size
]};
blocked_matrix_view
result_naive
{
result_data_naive
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
blocked_matrix_view
result_div
{
result_data_div
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
result
{
result_data
.
get
(),
size
};
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
a
.
at
(
row
,
column
)
=
row
;
b
.
at
(
row
,
column
)
=
column
;
}
}
multiply_div_conquer
(
size
,
result_div
,
a
,
b
);
// Fill data arrays as needed
multiply_naive
(
size
,
result_naive
,
a
,
b
);
a
.
fill_default_data
();
b
.
fill_default_data
();
matrix_div_conquer
::
fill_block_lookup
(
size
);
size_t
misses
=
0
;
// Strain local data
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
div_conquer_temp_arrays
;
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
size_t
max_depth
=
0
;
if
(
result_div
.
at
(
row
,
column
)
!=
result_naive
.
at
(
row
,
column
))
{
size_t
remaining_size
=
size
;
misses
++
;
while
(
remaining_size
>
1
)
{
printf
(
"%5.5f
\t\t
"
,
result_div
.
at
(
row
,
column
)
-
result_naive
.
at
(
row
,
column
));
auto
&
depth_buffers
=
div_conquer_temp_arrays
.
emplace_back
();
for
(
int
thread_id
=
0
;
thread_id
<
8
;
thread_id
++
)
{
auto
&
depth_thread_buffers
=
depth_buffers
.
emplace_back
();
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
depth_thread_buffers
.
emplace_back
(
new
double
[(
remaining_size
/
2
)
*
(
remaining_size
/
2
)]);
}
}
}
}
max_depth
++
;
remaining_size
=
remaining_size
/
2
;
}
}
printf
(
"
\n
%d"
,
misses
);
pls
::
strain_local_resource
local_indices
{(
unsigned
)
num_threads
,
(
unsigned
)
max_depth
};
// int num_threads;
scheduler
scheduler
{(
unsigned
)
num_threads
,
MAX_NUM_TASKS
,
MAX_STACK_SIZE
};
// string directory;
// benchmark_runner::read_args(argc, argv, num_threads, directory);
runner
.
run_iterations
(
1
,
[
&
]()
{
//
scheduler
.
perform_work
([
&
]()
{
// string test_name = to_string(num_threads) + ".csv";
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
result
,
a
,
b
);
// string full_directory = directory + "/PLS_v3/";
});
// benchmark_runner runner{full_directory, test_name};
},
0
);
//
runner
.
commit_results
(
true
);
// scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
//
scheduler
.
terminate
();
// runner.run_iterations(1000, [&]() {
// scheduler.perform_work([&]() {
// });
// }, 100);
// runner.commit_results(true);
}
}
extern/benchmark_base/CMakeLists.txt
View file @
8f47876d
...
@@ -8,7 +8,8 @@ add_library(benchmark_base STATIC
...
@@ -8,7 +8,8 @@ add_library(benchmark_base STATIC
include/benchmark_base/matrix.h
include/benchmark_base/matrix.h
include/benchmark_base/unbalanced.h src/unbalanced.cpp
include/benchmark_base/unbalanced.h src/unbalanced.cpp
include/benchmark_base/range.h
include/benchmark_base/range.h
include/benchmark_base/fib.h
)
include/benchmark_base/fib.h
include/benchmark_base/matrix_div_conquer.h
)
target_include_directories
(
benchmark_base
target_include_directories
(
benchmark_base
PUBLIC
PUBLIC
...
...
extern/benchmark_base/include/benchmark_base/matrix_div_conquer.h
0 → 100644
View file @
8f47876d
#ifndef COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
#define COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
#include <array>
namespace
comparison_benchmarks
{
namespace
base
{
namespace
matrix_div_conquer
{
const
int
MATRIX_SIZE
=
128
;
const
int
CUTOFF_SIZE
=
8
;
const
int
NUM_ITERATIONS
=
100
;
const
int
WARMUP_ITERATIONS
=
10
;
// Helpers to directly index into blocked matrices
const
size_t
MAX_SIZE
=
128
;
std
::
array
<
std
::
array
<
size_t
,
MAX_SIZE
>
,
MAX_SIZE
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
=
MAX_SIZE
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
void
fill_default_data
()
{
for
(
size_t
row
=
0
;
row
<
size_
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size_
;
column
++
)
{
at
(
row
,
column
)
=
row
;
}
}
}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
result
.
at
(
i
,
j
)
=
0
;
}
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
}
}
}
#endif // COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
8f47876d
...
@@ -154,7 +154,6 @@ template<typename Function>
...
@@ -154,7 +154,6 @@ template<typename Function>
void
scheduler
::
spawn_internal
(
Function
&&
lambda
)
{
void
scheduler
::
spawn_internal
(
Function
&&
lambda
)
{
if
(
thread_state
::
is_scheduler_active
())
{
if
(
thread_state
::
is_scheduler_active
())
{
thread_state
&
spawning_state
=
thread_state
::
get
();
thread_state
&
spawning_state
=
thread_state
::
get
();
scheduler
&
scheduler
=
spawning_state
.
get_scheduler
();
base_task
*
last_task
=
spawning_state
.
get_active_task
();
base_task
*
last_task
=
spawning_state
.
get_active_task
();
base_task
*
spawned_task
=
last_task
->
next_
;
base_task
*
spawned_task
=
last_task
->
next_
;
...
...
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
View file @
8f47876d
...
@@ -60,8 +60,7 @@ class strain_local_resource {
...
@@ -60,8 +60,7 @@ class strain_local_resource {
};
};
strain_local_resource
(
unsigned
num_threads
,
strain_local_resource
(
unsigned
num_threads
,
unsigned
depth
)
:
local_items_
()
{
unsigned
depth
)
:
local_items_
(
num_threads
)
{
local_items_
.
reserve
(
num_threads
);
for
(
unsigned
thread_id
=
0
;
thread_id
<
num_threads
;
thread_id
++
)
{
for
(
unsigned
thread_id
=
0
;
thread_id
<
num_threads
;
thread_id
++
)
{
local_items_
[
thread_id
].
reserve
(
depth
);
local_items_
[
thread_id
].
reserve
(
depth
);
for
(
unsigned
i
=
0
;
i
<
depth
;
i
++
)
{
for
(
unsigned
i
=
0
;
i
<
depth
;
i
++
)
{
...
...
lib/pls/include/pls/pls.h
View file @
8f47876d
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#include "pls/algorithms/reduce.h"
#include "pls/algorithms/reduce.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/strain_local_resource.h"
#include "pls/internal/helpers/range.h"
#include "pls/internal/helpers/range.h"
#include "pls/internal/helpers/member_function.h"
#include "pls/internal/helpers/member_function.h"
...
@@ -28,6 +29,9 @@ static void serial(Function &&function) {
...
@@ -28,6 +29,9 @@ static void serial(Function &&function) {
scheduler
::
serial
(
std
::
forward
<
Function
>
(
function
));
scheduler
::
serial
(
std
::
forward
<
Function
>
(
function
));
}
}
// strain local resource support (rather low-level)
using
internal
::
scheduling
::
strain_local_resource
;
// general helpers that can be handy when using PLS
// general helpers that can be handy when using PLS
template
<
class
C
,
typename
R
,
typename
...
ARGS
>
template
<
class
C
,
typename
R
,
typename
...
ARGS
>
using
member_function
=
internal
::
helpers
::
member_function
<
C
,
R
,
ARGS
...
>
;
using
member_function
=
internal
::
helpers
::
member_function
<
C
,
R
,
ARGS
...
>
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment