Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
8f47876d
authored
Jun 08, 2020
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add standard divide and conquer matrix multiplication test for comparison.
parent
92ee564c
Pipeline
#1502
passed with stages
in 4 minutes 13 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
228 additions
and
165 deletions
+228
-165
app/benchmark_matrix_div_conquer/main.cpp
+125
-161
extern/benchmark_base/CMakeLists.txt
+2
-1
extern/benchmark_base/include/benchmark_base/matrix_div_conquer.h
+96
-0
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+0
-1
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
+1
-2
lib/pls/include/pls/pls.h
+4
-0
No files found.
app/benchmark_matrix_div_conquer/main.cpp
View file @
8f47876d
//
#include "pls/pls.h"
//
using namespace pls;
#include "pls/pls.h"
using
namespace
pls
;
#include "benchmark_runner.h"
#include "benchmark_base/matrix_div_conquer.h"
using
namespace
comparison_benchmarks
::
base
;
#include <memory>
#include <array>
#include <math.h>
// Helpers to directly index into blocked matrices
const
size_t
MAX_BLOCK_LOOKUP
=
256
;
std
::
array
<
std
::
array
<
size_t
,
MAX_BLOCK_LOOKUP
>
,
MAX_BLOCK_LOOKUP
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
#include <vector>
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
result
.
at
(
i
,
j
)
=
0
;
}
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
void
multiply_div_conquer
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
&
tmp_arrays
,
pls
::
strain_local_resource
&
local_indices
,
size_t
size
,
size_t
depth
,
matrix_div_conquer
::
blocked_matrix_view
&
result
,
matrix_div_conquer
::
blocked_matrix_view
&
a
,
matrix_div_conquer
::
blocked_matrix_view
&
b
)
{
void
multiply_div_conquer
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
if
(
size
<=
8
)
{
multiply_naive
(
size
,
result
,
a
,
b
);
return
;
}
// Temporary storage required for the intermediate results
std
::
unique_ptr
<
double
[]
>
data_1_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_1_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_1_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_1_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_2_a
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
std
::
unique_ptr
<
double
[]
>
data_2_2_b
{
new
double
[(
size
/
2
)
*
(
size
/
2
)]};
auto
strain_local_index
=
local_indices
.
get_item
(
depth
);
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
0
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
1
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
2
];
std
::
unique_ptr
<
double
[]
>
const
&
data_1_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
3
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
4
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_1_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
5
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_a
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
6
];
std
::
unique_ptr
<
double
[]
>
const
&
data_2_2_b
=
tmp_arrays
[
depth
][
strain_local_index
.
get_strain_index
()][
7
];
// Handles to sub-matrices used
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
result_1_1
=
result
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
result_1_2
=
result
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
result_2_1
=
result
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
result_2_2
=
result
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
result_1_1_a
{
data_1_1_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_1_b
{
data_1_1_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_2_a
{
data_1_2_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_1_2_b
{
data_1_2_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_1_a
{
data_2_1_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_1_b
{
data_2_1_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_2_a
{
data_2_2_a
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
result_2_2_b
{
data_2_2_b
.
get
(),
size
/
2
};
matrix_div_conquer
::
blocked_matrix_view
a_1_1
=
a
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
a_1_2
=
a
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
a_2_1
=
a
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
a_2_2
=
a
.
quadrant_2_2
();
matrix_div_conquer
::
blocked_matrix_view
b_1_1
=
b
.
quadrant_1_1
();
matrix_div_conquer
::
blocked_matrix_view
b_1_2
=
b
.
quadrant_1_2
();
matrix_div_conquer
::
blocked_matrix_view
b_2_1
=
b
.
quadrant_2_1
();
matrix_div_conquer
::
blocked_matrix_view
b_2_2
=
b
.
quadrant_2_2
();
// Divide Work Into Sub-Calls
multiply_div_conquer
(
size
/
2
,
result_1_1_a
,
a_1_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_1_1_b
,
a_1_2
,
b_2_1
);
multiply_div_conquer
(
size
/
2
,
result_1_2_a
,
a_1_1
,
b_1_2
);
multiply_div_conquer
(
size
/
2
,
result_1_2_b
,
a_1_2
,
b_2_2
);
multiply_div_conquer
(
size
/
2
,
result_2_1_a
,
a_2_1
,
b_1_1
);
multiply_div_conquer
(
size
/
2
,
result_2_1_b
,
a_2_2
,
b_2_1
);
multiply_div_conquer
(
size
/
2
,
result_2_2_a
,
a_2_1
,
b_1_2
);
multiply_div_conquer
(
size
/
2
,
result_2_2_b
,
a_2_2
,
b_2_2
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_a
,
a_1_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_1_b
,
a_1_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_a
,
a_1_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_1_2_b
,
a_1_2
,
b_2_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_a
,
a_2_1
,
b_1_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_1_b
,
a_2_2
,
b_2_1
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_a
,
a_2_1
,
b_1_2
);
}
);
pls
::
spawn
(
[
&
]()
{
multiply_div_conquer
(
tmp_arrays
,
local_indices
,
size
/
2
,
depth
+
1
,
result_2_2_b
,
a_2_2
,
b_2_2
);
}
);
pls
::
sync
();
// Combine results
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
result_1_1
.
at
(
row
,
column
)
=
result_1_1_a
.
at
(
row
,
column
)
+
result_1_1_b
.
at
(
row
,
column
);
result_1_2
.
at
(
row
,
column
)
=
result_1_2_a
.
at
(
row
,
column
)
+
result_1_2_b
.
at
(
row
,
column
);
result_2_1
.
at
(
row
,
column
)
=
result_2_1_a
.
at
(
row
,
column
)
+
result_2_1_b
.
at
(
row
,
column
);
result_2_2
.
at
(
row
,
column
)
=
result_2_2_a
.
at
(
row
,
column
)
+
result_2_2_b
.
at
(
row
,
column
);
}
for
(
size_t
i
=
0
;
i
<
(
size
/
2
)
*
(
size
/
2
);
i
++
)
{
// The layout is not important here, ass all have the same order, so just sum element wise
result_1_1
.
get_data
()[
i
]
=
result_1_1_a
.
get_data
()[
i
]
+
result_1_1_b
.
get_data
()[
i
];
result_1_2
.
get_data
()[
i
]
=
result_1_2_a
.
get_data
()[
i
]
+
result_1_2_b
.
get_data
()[
i
];
result_2_1
.
get_data
()[
i
]
=
result_2_1_a
.
get_data
()[
i
]
+
result_2_1_b
.
get_data
()[
i
];
result_2_2
.
get_data
()[
i
]
=
result_2_2_a
.
get_data
()[
i
]
+
result_2_2_b
.
get_data
()[
i
];
}
}
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
1
;
constexpr
int
MAX_STACK_SIZE
=
4096
*
2
;
int
main
(
int
argc
,
char
**
argv
)
{
fill_block_lookup
(
MAX_BLOCK_LOOKUP
)
;
const
size_t
size
=
matrix_div_conquer
::
MATRIX_SIZE
;
size_t
size
=
64
;
std
::
unique_ptr
<
double
[]
>
result_data_naive
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
result_data_div
{
new
double
[
size
*
size
]};
int
num_threads
;
string
directory
;
benchmark_runner
::
read_args
(
argc
,
argv
,
num_threads
,
directory
);
string
test_name
=
to_string
(
num_threads
)
+
".csv"
;
string
full_directory
=
directory
+
"/PLS_v3/"
;
benchmark_runner
runner
{
full_directory
,
test_name
};
// Only run on one version to avoid copy
std
::
unique_ptr
<
double
[]
>
result_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
a_data
{
new
double
[
size
*
size
]};
std
::
unique_ptr
<
double
[]
>
b_data
{
new
double
[
size
*
size
]};
blocked_matrix_view
result_naive
{
result_data_naive
.
get
(),
size
};
blocked_matrix_view
result_div
{
result_data_div
.
get
(),
size
};
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
a
.
at
(
row
,
column
)
=
row
;
b
.
at
(
row
,
column
)
=
column
;
matrix_div_conquer
::
blocked_matrix_view
a
{
a_data
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
b
{
b_data
.
get
(),
size
};
matrix_div_conquer
::
blocked_matrix_view
result
{
result_data
.
get
(),
size
};
// Fill data arrays as needed
a
.
fill_default_data
();
b
.
fill_default_data
();
matrix_div_conquer
::
fill_block_lookup
(
size
);
// Strain local data
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
double
[]
>>>>
div_conquer_temp_arrays
;
size_t
max_depth
=
0
;
size_t
remaining_size
=
size
;
while
(
remaining_size
>
1
)
{
auto
&
depth_buffers
=
div_conquer_temp_arrays
.
emplace_back
();
for
(
int
thread_id
=
0
;
thread_id
<
8
;
thread_id
++
)
{
auto
&
depth_thread_buffers
=
depth_buffers
.
emplace_back
();
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
depth_thread_buffers
.
emplace_back
(
new
double
[(
remaining_size
/
2
)
*
(
remaining_size
/
2
)]);
}
}
max_depth
++
;
remaining_size
=
remaining_size
/
2
;
}
pls
::
strain_local_resource
local_indices
{(
unsigned
)
num_threads
,
(
unsigned
)
max_depth
};
multiply_div_conquer
(
size
,
result_div
,
a
,
b
);
multiply_naive
(
size
,
result_naive
,
a
,
b
);
scheduler
scheduler
{(
unsigned
)
num_threads
,
MAX_NUM_TASKS
,
MAX_STACK_SIZE
};
size_t
misses
=
0
;
for
(
size_t
row
=
0
;
row
<
size
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
;
column
++
)
{
if
(
result_div
.
at
(
row
,
column
)
!=
result_naive
.
at
(
row
,
column
))
{
misses
++
;
printf
(
"%5.5f
\t\t
"
,
result_div
.
at
(
row
,
column
)
-
result_naive
.
at
(
row
,
column
));
}
}
}
printf
(
"
\n
%d"
,
misses
);
// int num_threads;
// string directory;
// benchmark_runner::read_args(argc, argv, num_threads, directory);
//
// string test_name = to_string(num_threads) + ".csv";
// string full_directory = directory + "/PLS_v3/";
// benchmark_runner runner{full_directory, test_name};
//
// scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
//
// runner.run_iterations(1000, [&]() {
// scheduler.perform_work([&]() {
// });
// }, 100);
// runner.commit_results(true);
runner
.
run_iterations
(
1
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
multiply_div_conquer
(
div_conquer_temp_arrays
,
local_indices
,
size
,
0
,
result
,
a
,
b
);
});
},
0
);
runner
.
commit_results
(
true
);
scheduler
.
terminate
();
}
extern/benchmark_base/CMakeLists.txt
View file @
8f47876d
...
...
@@ -8,7 +8,8 @@ add_library(benchmark_base STATIC
include/benchmark_base/matrix.h
include/benchmark_base/unbalanced.h src/unbalanced.cpp
include/benchmark_base/range.h
include/benchmark_base/fib.h
)
include/benchmark_base/fib.h
include/benchmark_base/matrix_div_conquer.h
)
target_include_directories
(
benchmark_base
PUBLIC
...
...
extern/benchmark_base/include/benchmark_base/matrix_div_conquer.h
0 → 100644
View file @
8f47876d
#ifndef COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
#define COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
#include <array>
namespace
comparison_benchmarks
{
namespace
base
{
namespace
matrix_div_conquer
{
const
int
MATRIX_SIZE
=
128
;
const
int
CUTOFF_SIZE
=
8
;
const
int
NUM_ITERATIONS
=
100
;
const
int
WARMUP_ITERATIONS
=
10
;
// Helpers to directly index into blocked matrices
const
size_t
MAX_SIZE
=
128
;
std
::
array
<
std
::
array
<
size_t
,
MAX_SIZE
>
,
MAX_SIZE
>
BLOCK_LOOKUP
;
// ROW, COLUMN
void
fill_block_lookup
(
size_t
size
=
MAX_SIZE
)
{
if
(
size
<=
1
)
{
BLOCK_LOOKUP
[
0
][
0
]
=
0
;
return
;
}
fill_block_lookup
(
size
/
2
);
size_t
elements_per_quarter
=
(
size
/
2
)
*
(
size
/
2
);
for
(
size_t
row
=
0
;
row
<
size
/
2
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size
/
2
;
column
++
)
{
BLOCK_LOOKUP
[
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
2
*
elements_per_quarter
;
BLOCK_LOOKUP
[
size
/
2
+
row
][
size
/
2
+
column
]
=
BLOCK_LOOKUP
[
row
][
column
]
+
3
*
elements_per_quarter
;
}
}
}
class
blocked_matrix_view
{
public
:
blocked_matrix_view
(
double
*
data
,
size_t
size
)
:
data_
{
data
},
size_
{
size
}
{}
void
fill_default_data
()
{
for
(
size_t
row
=
0
;
row
<
size_
;
row
++
)
{
for
(
size_t
column
=
0
;
column
<
size_
;
column
++
)
{
at
(
row
,
column
)
=
row
;
}
}
}
blocked_matrix_view
quadrant_1_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
0
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_1_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
1
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_1
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
2
*
elements_per_quarter
,
size_
/
2
);
}
blocked_matrix_view
quadrant_2_2
()
{
size_t
elements_per_quarter
=
(
size_
/
2
)
*
(
size_
/
2
);
return
blocked_matrix_view
(
data_
+
3
*
elements_per_quarter
,
size_
/
2
);
}
double
&
at
(
size_t
row
,
size_t
column
)
{
return
data_
[
BLOCK_LOOKUP
[
row
][
column
]];
}
double
*
get_data
()
{
return
data_
;
}
private
:
double
*
data_
;
size_t
size_
;
};
void
multiply_naive
(
size_t
size
,
blocked_matrix_view
&
result
,
blocked_matrix_view
&
a
,
blocked_matrix_view
&
b
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
result
.
at
(
i
,
j
)
=
0
;
}
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
size
;
k
++
)
{
result
.
at
(
i
,
j
)
+=
a
.
at
(
i
,
k
)
*
b
.
at
(
k
,
j
);
}
}
}
}
}
}
}
#endif // COMPARISON_BENCHMARKS_BASE_MATRIX_DIV_CONQUER_H
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
8f47876d
...
...
@@ -154,7 +154,6 @@ template<typename Function>
void
scheduler
::
spawn_internal
(
Function
&&
lambda
)
{
if
(
thread_state
::
is_scheduler_active
())
{
thread_state
&
spawning_state
=
thread_state
::
get
();
scheduler
&
scheduler
=
spawning_state
.
get_scheduler
();
base_task
*
last_task
=
spawning_state
.
get_active_task
();
base_task
*
spawned_task
=
last_task
->
next_
;
...
...
lib/pls/include/pls/internal/scheduling/strain_local_resource.h
View file @
8f47876d
...
...
@@ -60,8 +60,7 @@ class strain_local_resource {
};
strain_local_resource
(
unsigned
num_threads
,
unsigned
depth
)
:
local_items_
()
{
local_items_
.
reserve
(
num_threads
);
unsigned
depth
)
:
local_items_
(
num_threads
)
{
for
(
unsigned
thread_id
=
0
;
thread_id
<
num_threads
;
thread_id
++
)
{
local_items_
[
thread_id
].
reserve
(
depth
);
for
(
unsigned
i
=
0
;
i
<
depth
;
i
++
)
{
...
...
lib/pls/include/pls/pls.h
View file @
8f47876d
...
...
@@ -8,6 +8,7 @@
#include "pls/algorithms/reduce.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/strain_local_resource.h"
#include "pls/internal/helpers/range.h"
#include "pls/internal/helpers/member_function.h"
...
...
@@ -28,6 +29,9 @@ static void serial(Function &&function) {
scheduler
::
serial
(
std
::
forward
<
Function
>
(
function
));
}
// strain local resource support (rather low-level)
using
internal
::
scheduling
::
strain_local_resource
;
// general helpers that can be handy when using PLS
template
<
class
C
,
typename
R
,
typename
...
ARGS
>
using
member_function
=
internal
::
helpers
::
member_function
<
C
,
R
,
ARGS
...
>
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment