Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
7796022f
authored
5 years ago
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix matrix multiplication benchmark for new scheduler.
parent
01596ff3
master
Pipeline
#1393
failed with stages
in 26 seconds
Changes
4
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
51 additions
and
67 deletions
+51
-67
app/benchmark_matrix/main.cpp
+17
-35
lib/pls/include/pls/algorithms/for_each.h
+4
-6
lib/pls/include/pls/algorithms/for_each_impl.h
+18
-14
lib/pls/include/pls/internal/helpers/range.h
+12
-12
No files found.
app/benchmark_matrix/main.cpp
View file @
7796022f
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/static_scheduler_memory.h"
#include "pls/algorithms/for_each.h"
using
namespace
pls
::
internal
::
scheduling
;
...
...
@@ -15,17 +14,20 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
public
:
pls_matrix
()
:
matrix
::
matrix
<
T
,
SIZE
>
()
{}
parallel_result
<
int
>
pls_multiply
(
const
matrix
::
matrix
<
T
,
SIZE
>
&
a
,
const
matrix
::
matrix
<
T
,
SIZE
>
&
b
)
{
return
pls
::
algorithm
::
for_each_range
(
0
,
SIZE
,
[
this
,
&
a
,
&
b
](
int
i
)
{
void
pls_multiply
(
const
matrix
::
matrix
<
T
,
SIZE
>
&
a
,
const
matrix
::
matrix
<
T
,
SIZE
>
&
b
)
{
pls
::
algorithm
::
for_each_range
(
0
,
SIZE
,
[
this
,
&
a
,
&
b
](
int
i
)
{
this
->
multiply_column
(
i
,
a
,
b
);
});
}
};
constexpr
size_t
MAX_NUM_THREADS
=
8
;
constexpr
size_t
MAX_NUM_TASKS
=
32
;
constexpr
size_t
MAX_NUM_CONTS
=
32
;
constexpr
size_t
MAX_CONT_SIZE
=
512
;
constexpr
int
MAX_NUM_THREADS
=
8
;
constexpr
int
MAX_NUM_TASKS
=
32
;
constexpr
int
MAX_STACK_SIZE
=
1024
*
1
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
MAX_STACK_SIZE
>
global_scheduler_memory
;
int
main
(
int
argc
,
char
**
argv
)
{
int
num_threads
;
...
...
@@ -40,40 +42,20 @@ int main(int argc, char **argv) {
pls_matrix
<
double
,
matrix
::
MATRIX_SIZE
>
b
;
pls_matrix
<
double
,
matrix
::
MATRIX_SIZE
>
result
;
static_scheduler_memory
<
MAX_NUM_THREADS
,
MAX_NUM_TASKS
,
MAX_NUM_CONTS
,
MAX_CONT_SIZE
>
static_scheduler_memory
;
scheduler
scheduler
{
static_scheduler_memory
,
(
unsigned
int
)
num_threads
};
for
(
int
i
=
0
;
i
<
matrix
::
WARMUP_ITERATIONS
;
i
++
)
{
scheduler
scheduler
{
global_scheduler_memory
,
(
unsigned
)
num_threads
};
scheduler
.
perform_work
([
&
]()
{
return
scheduler
::
par
([
&
]()
{
return
result
.
pls_multiply
(
a
,
b
);
},
[]()
{
return
parallel_result
<
int
>
{
0
};
}).
then
([
&
](
int
,
int
)
{
return
parallel_result
<
int
>
{
0
};
});
});
for
(
int
i
=
0
;
i
<
matrix
::
WARMUP_ITERATIONS
;
i
++
)
{
result
.
pls_multiply
(
a
,
b
);
}
});
for
(
int
i
=
0
;
i
<
matrix
::
NUM_ITERATIONS
;
i
++
)
{
scheduler
.
perform_work
([
&
]()
{
for
(
int
i
=
0
;
i
<
matrix
::
NUM_ITERATIONS
;
i
++
)
{
runner
.
start_iteration
();
return
scheduler
::
par
([
&
]()
{
return
result
.
pls_multiply
(
a
,
b
);
},
[]()
{
return
parallel_result
<
int
>
{
0
};
}).
then
([
&
](
int
,
int
)
{
result
.
pls_multiply
(
a
,
b
);
runner
.
end_iteration
();
return
parallel_result
<
int
>
{
0
};
});
});
}
});
runner
.
commit_results
(
true
);
}
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/algorithms/for_each.h
View file @
7796022f
...
...
@@ -2,8 +2,6 @@
#ifndef PLS_PARALLEL_FOR_H
#define PLS_PARALLEL_FOR_H
#include "pls/internal/scheduling/parallel_result.h"
namespace
pls
{
namespace
algorithm
{
...
...
@@ -11,24 +9,24 @@ class fixed_strategy;
class
dynamic_strategy
;
template
<
typename
Function
,
typename
ExecutionStrategy
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each_range
(
unsigned
long
first
,
void
for_each_range
(
unsigned
long
first
,
unsigned
long
last
,
const
Function
&
function
,
ExecutionStrategy
&
execution_strategy
);
template
<
typename
Function
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each_range
(
unsigned
long
first
,
void
for_each_range
(
unsigned
long
first
,
unsigned
long
last
,
const
Function
&
function
);
template
<
typename
RandomIt
,
typename
Function
,
typename
ExecutionStrategy
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each
(
RandomIt
first
,
void
for_each
(
RandomIt
first
,
RandomIt
last
,
const
Function
&
function
,
ExecutionStrategy
execution_strategy
);
template
<
typename
RandomIt
,
typename
Function
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each
(
RandomIt
first
,
void
for_each
(
RandomIt
first
,
RandomIt
last
,
const
Function
&
function
);
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/algorithms/for_each_impl.h
View file @
7796022f
...
...
@@ -11,7 +11,7 @@ namespace algorithm {
namespace
internal
{
template
<
typename
RandomIt
,
typename
Function
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each
(
const
RandomIt
first
,
void
for_each
(
const
RandomIt
first
,
const
RandomIt
last
,
const
Function
function
,
const
long
min_elements
)
{
...
...
@@ -23,25 +23,23 @@ pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first,
for
(
auto
current
=
first
;
current
!=
last
;
current
++
)
{
function
(
*
current
);
}
return
parallel_result
<
int
>
{
0
};
}
else
{
// Cut in half recursively
const
long
middle_index
=
num_elements
/
2
;
return
scheduler
::
par
([
first
,
middle_index
,
last
,
function
,
min_elements
]
{
scheduler
::
spawn
([
first
,
middle_index
,
last
,
&
function
,
min_elements
]
{
return
internal
::
for_each
(
first
,
first
+
middle_index
,
function
,
min_elements
);
},
[
first
,
middle_index
,
last
,
function
,
min_elements
]
{
});
scheduler
::
spawn
([
first
,
middle_index
,
last
,
&
function
,
min_elements
]
{
return
internal
::
for_each
(
first
+
middle_index
,
last
,
function
,
min_elements
);
}).
then
([](
int
,
int
)
{
return
parallel_result
<
int
>
{
0
};
});
scheduler
::
sync
();
}
}
...
...
@@ -52,7 +50,7 @@ class dynamic_strategy {
explicit
dynamic_strategy
(
const
unsigned
int
tasks_per_thread
=
4
)
:
tasks_per_thread_
{
tasks_per_thread
}
{};
long
calculate_min_elements
(
long
num_elements
)
const
{
const
long
num_threads
=
pls
::
internal
::
scheduling
::
thread_state
::
get
().
scheduler_
->
num_threads
();
const
long
num_threads
=
pls
::
internal
::
scheduling
::
thread_state
::
get
().
get_scheduler
().
num_threads
();
return
num_elements
/
(
num_threads
*
tasks_per_thread_
);
}
private
:
...
...
@@ -71,21 +69,27 @@ class fixed_strategy {
};
template
<
typename
RandomIt
,
typename
Function
,
typename
ExecutionStrategy
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each
(
RandomIt
first
,
void
for_each
(
RandomIt
first
,
RandomIt
last
,
const
Function
&
function
,
ExecutionStrategy
execution_strategy
)
{
ExecutionStrategy
execution_strategy
)
{
long
num_elements
=
std
::
distance
(
first
,
last
);
return
internal
::
for_each
(
first
,
last
,
function
,
execution_strategy
.
calculate_min_elements
(
num_elements
));
return
internal
::
for_each
(
first
,
last
,
function
,
execution_strategy
.
calculate_min_elements
(
num_elements
)
);
}
template
<
typename
RandomIt
,
typename
Function
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each
(
RandomIt
first
,
RandomIt
last
,
const
Function
&
function
)
{
void
for_each
(
RandomIt
first
,
RandomIt
last
,
const
Function
&
function
)
{
return
for_each
(
first
,
last
,
function
,
dynamic_strategy
{
4
});
}
template
<
typename
Function
,
typename
ExecutionStrategy
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each_range
(
unsigned
long
first
,
void
for_each_range
(
unsigned
long
first
,
unsigned
long
last
,
const
Function
&
function
,
ExecutionStrategy
execution_strategy
)
{
...
...
@@ -94,7 +98,7 @@ pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long fir
}
template
<
typename
Function
>
pls
::
internal
::
scheduling
::
parallel_result
<
int
>
for_each_range
(
unsigned
long
first
,
void
for_each_range
(
unsigned
long
first
,
unsigned
long
last
,
const
Function
&
function
)
{
auto
range
=
pls
::
internal
::
helpers
::
range
(
first
,
last
);
...
...
This diff is collapsed.
Click to expand it.
lib/pls/include/pls/internal/helpers/range.h
View file @
7796022f
...
...
@@ -112,7 +112,7 @@ struct basic_range {
:
r
(
rhs
.
r
),
index
(
rhs
.
index
)
{}
const_iterator_impl
(
basic_range
<
IntegerType
>
const
*
p_range
,
size_type
p_index
)
:
r
(
*
p_range
),
index
(
p_index
)
{}
:
r
(
p_range
),
index
(
p_index
)
{}
const_iterator_impl
&
operator
=
(
const
const_iterator_impl
&
rhs
)
{
r
=
rhs
.
r
;
...
...
@@ -121,7 +121,7 @@ struct basic_range {
}
bool
operator
==
(
const
const_iterator_impl
&
rhs
)
const
{
return
r
==
rhs
.
r
&&
index
==
rhs
.
index
;
return
*
r
==
*
(
rhs
.
r
)
&&
index
==
rhs
.
index
;
}
bool
operator
!=
(
const
const_iterator_impl
&
rhs
)
const
{
...
...
@@ -145,7 +145,7 @@ struct basic_range {
}
value_type
operator
*
()
const
{
return
r
.
m_first_element
+
r
.
m_step
*
index
;
return
r
->
m_first_element
+
r
->
m_step
*
index
;
}
// operator->
...
...
@@ -212,11 +212,11 @@ struct basic_range {
value_type
operator
[](
difference_type
offset
)
const
{
size_type
new_index
=
index
+
offset
;
return
r
.
m_first_element
+
r
.
m_step
*
new_index
;
return
r
->
m_first_element
+
r
->
m_step
*
new_index
;
}
private
:
basic_range
<
IntegerType
>
r
;
basic_range
<
IntegerType
>
const
*
r
;
size_type
index
;
};
...
...
@@ -236,7 +236,7 @@ struct basic_range {
:
r
(
rhs
.
r
),
index
(
rhs
.
index
)
{}
const_reverse_iterator_impl
(
basic_range
<
IntegerType
>
const
*
p_range
,
size_type
p_index
)
:
r
(
*
p_range
),
index
(
p_index
)
{}
:
r
(
p_range
),
index
(
p_index
)
{}
const_reverse_iterator_impl
&
operator
=
(
const
const_reverse_iterator_impl
&
rhs
)
{
r
=
rhs
.
r
;
...
...
@@ -245,7 +245,7 @@ struct basic_range {
}
bool
operator
==
(
const
const_reverse_iterator_impl
&
rhs
)
const
{
return
r
==
rhs
.
r
&&
index
==
rhs
.
index
;
return
*
r
==
*
(
rhs
.
r
)
&&
index
==
rhs
.
index
;
}
bool
operator
!=
(
const
const_reverse_iterator_impl
&
rhs
)
const
{
...
...
@@ -270,8 +270,8 @@ struct basic_range {
value_type
operator
*
()
const
{
size_type
reverse_index
=
(
r
.
m_element_count
-
1
)
-
index
;
return
r
.
m_first_element
+
r
.
m_step
*
reverse_index
;
=
(
r
->
m_element_count
-
1
)
-
index
;
return
r
->
m_first_element
+
r
->
m_step
*
reverse_index
;
}
// operator->
...
...
@@ -338,12 +338,12 @@ struct basic_range {
value_type
operator
[](
difference_type
offset
)
const
{
size_type
new_reverse_index
=
(
r
.
m_element_count
-
1
)
-
(
index
+
offset
);
return
r
.
m_first_element
+
r
.
m_step
*
new_reverse_index
;
=
(
r
->
m_element_count
-
1
)
-
(
index
+
offset
);
return
r
->
m_first_element
+
r
->
m_step
*
new_reverse_index
;
}
private
:
basic_range
<
IntegerType
>
r
;
basic_range
<
IntegerType
>
const
*
r
;
size_type
index
;
};
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment