Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
8f47876d
authored
4 years ago
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add standard divide and conquer matrix multiplication test for comparison.
parent
92ee564c
Pipeline
#1502
passed with stages
in 4 minutes 13 seconds
Changes
6
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
227 additions
and
164 deletions
+227
-164
app/benchmark_matrix_div_conquer/main.cpp
+124
-160
extern/benchmark_base/CMakeLists.txt
+2
-1
extern/benchmark_base/include/benchmark_base/matrix_div_conquer.h
+96
-0
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+0
-1
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
+1
-2
lib/pls/include/pls/pls.h
+4
-0
No files found.
app/benchmark_matrix_div_conquer/main.cpp
View file @
8f47876d
//
#include "pls/pls.h"
//
using namespace pls;
#include "pls/pls.h"
using
namespace
pls
;
#include "benchmark_runner.h"
#include "benchmark_base/matrix_div_conquer.h"
using
namespace
comparison_benchmarks
::
base
;
#include <memory>
#include <array>
#include <math.h>
// Helpers to directly index into blocked matrices
const
size_t
MAX_BLOCK_LOOKUP
=
256
;
std
::
array
<
std
::
array
<
size_t
,
MAX_BLOCK_LOOKUP
>
,
MAX_BLOCK_LOOKUP
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
#include <vector>
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
result
.
at
(
i
,
j
)
=
0
;
}
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
void
multiply_div_conquer
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
&
tmp_arrays
,
pls
::
strain_local_resource
&
local_indices
,
size_t
size
,
size_t
depth
,
matrix_div_conquer
::
blocked_matrix_view
&
result
,
matrix_div_conquer
::
blocked_matrix_view
&
a
,
matrix_div_conquer
::
blocked_matrix_view
&
b
)
{
void
multiply_div_conquer
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
if
(
size
<=
8
)
{
multiply_naive
(
size
,
result
,
a
,
b
);
return
;
}
// Temporary storage required for the intermediate results
std
::
unique_ptr
<
double
[]
>
data_1_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
auto
strain_local_index
=
local_indices
.
get_item
(
depth
);
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
0
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
1
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
2
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
3
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
4
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
5
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
6
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
7
];
// Handles to sub-matrices used
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
// Divide Work Into Sub-Calls
multiply_div_conquer
(
size
/
2
,
result_1_1_a
,
a_1_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_1_1_b
,
a_1_2
,
b_2_1
);
multiply_div_conquer
(
size
/
2
,
result_1_2_a
,
a_1_1
,
b_1_2
);
multiply_div_conquer
(
size
/
2
,
result_1_2_b
,
a_1_2
,
b_2_2
);
multiply_div_conquer
(
size
/
2
,
result_2_1_a
,
a_2_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_2_1_b
,
a_2_2
,
b_2_1
);
multiply_div_conquer
(
size
/
2
,
result_2_2_a
,
a_2_1
,
b_1_2
);
multiply_div_conquer
(
size
/
2
,
result_2_2_b
,
a_2_2
,
b_2_2
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_a
,
a_1_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_b
,
a_1_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_a
,
a_1_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_b
,
a_1_2
,
b_2_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_a
,
a_2_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_b
,
a_2_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_a
,
a_2_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_b
,
a_2_2
,
b_2_2
);
}
);
pls
::
sync
();
// Combine results
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
result_1_1
.
at
(
row
,
column
)
=
result_1_1_a
.
at
(
row
,
column
)
+
result_1_1_b
.
at
(
row
,
column
);
result_1_2
.
at
(
row
,
column
)
=
result_1_2_a
.
at
(
row
,
column
)
+
result_1_2_b
.
at
(
row
,
column
);
result_2_1
.
at
(
row
,
column
)
=
result_2_1_a
.
at
(
row
,
column
)
+
result_2_1_b
.
at
(
row
,
column
);
result_2_2
.
at
(
row
,
column
)
=
result_2_2_a
.
at
(
row
,
column
)
+
result_2_2_b
.
at
(
row
,
column
);
}
for
(
size_t
i
=
0
;
i
<
(
size
/
2
)
*
(
size
/
2
);
i
++
)
{
// The layout is not important here, ass all have the same order, so just sum element wise
result_1_1
.
get_data
()[
i
]
=
result_1_1_a
.
get_data
()[
i
]
+
result_1_1_b
.
get_data
()[
i
];
result_1_2
.
get_data
()[
i
]
=
result_1_2_a
.
get_data
()[
i
]
+
result_1_2_b
.
get_data
()[
i
];
result_2_1
.
get_data
()[
i
]
=
result_2_1_a
.
get_data
()[
i
]
+
result_2_1_b
.
get_data
()[
i
];
result_2_2
.
get_data
()[
i
]
=
result_2_2_a
.
get_data
()[
i
]
+
result_2_2_b
.
get_data
()[
i
];
}
}
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
2
;
int
main
(
int
argc
,
char
**
argv
)
{
fill_block_lookup
(
MAX_BLOCK_LOOKUP
)
;
const
size_t
size
=
matrix_div_conquer
::
MATRIX_SIZE
;
size_t
size
=
64
;
std
::
unique_ptr
<
double
[]
>
result_data_naive
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
result_data_div
{
new
double
[
size
*
size
]};
int
num_threads
;
string
directory
;
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v3/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
// Only run on one version to avoid copy
std
::
unique_ptr
<
double
[]
>
result_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
a_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
b_data
{
new
double
[
size
*
size
]};
blocked_matrix_view
result_naive
{
result_data_naive
.
get
(),
size
};
blocked_matrix_view
result_div
{
result_data_div
.
get
(),
size
};
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
a
.
at
(
row
,
column
)
=
row
;
b
.
at
(
row
,
column
)
=
column
;
}
}
matrix_div_conquer
::
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
result
{
result_data
.
get
(),
size
};
multiply_div_conquer
(
size
,
result_div
,
a
,
b
);
multiply_naive
(
size
,
result_naive
,
a
,
b
);
// Fill data arrays as needed
a
.
fill_default_data
();
b
.
fill_default_data
();
matrix_div_conquer
::
fill_block_lookup
(
size
);
size_t
misses
=
0
;
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
if
(
result_div
.
at
(
row
,
column
)
!=
result_naive
.
at
(
row
,
column
))
{
misses
++
;
printf
(
"%5.5f
\t\t
"
,
result_div
.
at
(
row
,
column
)
-
result_naive
.
at
(
row
,
column
));
// Strain local data
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
div_conquer_temp_arrays
;
size_t
max_depth
=
0
;
size_t
remaining_size
=
size
;
while
(
remaining_size
>
1
)
{
auto
&
depth_buffers
=
div_conquer_temp_arrays
.
emplace_back
();
for
(
int
thread_id
=
0
;
thread_id
<
8
;
thread_id
++
)
{
auto
&
depth_thread_buffers
=
depth_buffers
.
emplace_back
();
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
depth_thread_buffers
.
emplace_back
(
new
double
[(
remaining_size
/
2
)
*
(
remaining_size
/
2
)]);
}
}
max_depth
++
;
remaining_size
=
remaining_size
/
2
;
}
printf
(
"
\n
%d"
,
misses
);
// int num_threads;
// string directory;
// benchmark_runner::read_args(argc, argv, num_threads, directory);
//
// string test_name = to_string(num_threads) + ".csv";
// string full_directory = directory + "/PLS_v3/";
// benchmark_runner runner{full_directory, test_name};
//
// scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
//
// runner.run_iterations(1000, [&]() {
// scheduler.perform_work([&]() {
// });
// }, 100);
// runner.commit_results(true);
pls
::
strain_local_resource
local_indices
{(
unsigned
)
num_threads
,
(
unsigned
)
max_depth
};
scheduler
scheduler
{(
unsigned
)
num_threads
,
MAX_NUM_TASKS
,
MAX_STACK_SIZE
};
runner
.
run_iterations
(
1
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
result
,
a
,
b
);
});
},
0
);
runner
.
commit_results
(
true
);
scheduler
.
terminate
();
}
This diff is collapsed.
Click to expand it.
extern/benchmark_base/CMakeLists.txt
View file @
8f47876d
...
...
@@ -8,7 +8,8 @@ add_library(benchmark_base STATIC
include/benchmark_base/matrix.h
include/benchmark_base/unbalanced.h src/unbalanced.cpp
include/benchmark_base/range.h
include/benchmark_base/fib.h
)
include/benchmark_base/fib.h
include/benchmark_base/matrix_div_conquer.h
)
target_include_directories
(
benchmark_base
PUBLIC
...
...
This diff is collapsed.
Click to expand it.
extern/benchmark_base/include/benchmark_base/matrix_div_conquer.h
0 → 100644
View file @
8f47876d
#ifndef COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
#define COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
#include <array>
namespace
comparison_benchmarks
{
namespace
base
{
namespace
matrix_div_conquer
{
const
int
MATRIX_SIZE
=
128
;
const
int
CUTOFF_SIZE
=
8
;
const
int
NUM_ITERATIONS
=
100
;
const
int
WARMUP_ITERATIONS
=
10
;
// Helpers to directly index into blocked matrices
const
size_t
MAX_SIZE
=
128
;
std
::
array
<
std
::
array
<
size_t
,
MAX_SIZE
>
,
MAX_SIZE
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
=
MAX_SIZE
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
void
fill_default_data
()
{
for
(
size_t
row
=
0
;
row
<
size_
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size_
;
column
++
)
{
at
(
row
,
column
)
=
row
;
}
}
}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
result
.
at
(
i
,
j
)
=
0
;
}
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
}
}
}
#endif // COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
8f47876d
...
...
@@ -154,7 +154,6 @@ template<typename Function>
void
scheduler
::
spawn_internal
(
Function
&&
lambda
)
{
if
(
thread_state
::
is_scheduler_active
())
{
thread_state
&
spawning_state
=
thread_state
::
get
();
scheduler
&
scheduler
=
spawning_state
.
get_scheduler
();
base_task
*
last_task
=
spawning_state
.
get_active_task
();
base_task
*
spawned_task
=
last_task
->
next_
;
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
View file @
8f47876d
...
...
@@ -60,8 +60,7 @@ class strain_local_resource {
};
strain_local_resource
(
unsigned
num_threads
,
unsigned
depth
)
:
local_items_
()
{
local_items_
.
reserve
(
num_threads
);
unsigned
depth
)
:
local_items_
(
num_threads
)
{
for
(
unsigned
thread_id
=
0
;
thread_id
<
num_threads
;
thread_id
++
)
{
local_items_
[
thread_id
].
reserve
(
depth
);
for
(
unsigned
i
=
0
;
i
<
depth
;
i
++
)
{
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/pls.h
View file @
8f47876d
...
...
@@ -8,6 +8,7 @@
#include "pls/algorithms/reduce.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/strain_local_resource.h"
#include "pls/internal/helpers/range.h"
#include "pls/internal/helpers/member_function.h"
...
...
@@ -28,6 +29,9 @@ static void serial(Function &&function) {
scheduler
::
serial
(
std
::
forward
<
Function
>
(
function
));
}
// strain local resource support (rather low-level)
using
internal
::
scheduling
::
strain_local_resource
;
// general helpers that can be handy when using PLS
template
<
class
C
,
typename
R
,
typename
...
ARGS
>
using
member_function
=
internal
::
helpers
::
member_function
<
C
,
R
,
ARGS
...
>
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment