Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
FORMUS3IC_LAS3
/
embb
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
517d459b
authored
Apr 12, 2015
by
Tobias Fuchs
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
performance tests: finished refactoring of performance test framework
parent
f9beb8b6
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
367 additions
and
333 deletions
+367
-333
algorithms_cpp/perf/for_each_perf-inl.h
+10
-3
algorithms_cpp/perf/for_each_perf.h
+5
-7
algorithms_cpp/perf/main.cc
+24
-163
algorithms_cpp/perf/merge_sort_perf-inl.h
+8
-3
algorithms_cpp/perf/merge_sort_perf.h
+1
-1
algorithms_cpp/perf/quick_sort_perf-inl.h
+8
-3
algorithms_cpp/perf/quick_sort_perf.h
+1
-1
algorithms_cpp/perf/reduce_perf-inl.h
+10
-3
algorithms_cpp/perf/reduce_perf.h
+3
-3
algorithms_cpp/perf/scan_perf-inl.h
+10
-3
algorithms_cpp/perf/scan_perf.h
+1
-1
base_cpp/perf/include/embb/base/perf/call_args.h
+12
-35
base_cpp/perf/include/embb/base/perf/parallel_perf_test_unit.h
+35
-45
base_cpp/perf/include/embb/base/perf/perf.h
+67
-0
base_cpp/perf/include/embb/base/perf/performance_test.h
+4
-7
base_cpp/perf/include/embb/base/perf/serial_perf_test_unit.h
+139
-0
base_cpp/perf/include/embb/base/perf/speedup_test.h
+29
-33
base_cpp/perf/src/call_args.cc
+0
-22
No files found.
algorithms_cpp/perf/for_each_perf-inl.h
View file @
517d459b
...
...
@@ -79,9 +79,6 @@ ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args)
if
(
cargs
.
StressMode
()
==
CallArgs
::
RAM_STRESS
)
{
v
=
static_cast
<
T
*>
(
embb
::
base
::
Allocation
::
AllocateCacheAligned
(
vector_size
*
sizeof
(
T
)));
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
else
{
v
=
0
;
}
...
...
@@ -95,6 +92,16 @@ ParallelForEach<T>::~ParallelForEach() {
}
template
<
typename
T
>
void
ParallelForEach
<
T
>::
Pre
()
{
if
(
cargs
.
StressMode
()
==
CallArgs
::
RAM_STRESS
)
{
// Initialize input vector with incrementing values:
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
}
template
<
typename
T
>
void
ParallelForEach
<
T
>::
Run
(
unsigned
int
numThreads
)
{
if
(
cargs
.
StressMode
()
==
CallArgs
::
CPU_STRESS
)
{
// Computing input values, no memory access
...
...
algorithms_cpp/perf/for_each_perf.h
View file @
517d459b
...
...
@@ -34,15 +34,13 @@ namespace embb {
namespace
algorithms
{
namespace
perf
{
using
embb
::
base
::
perf
::
CallArgs
;
/**
* Operation performed in each loop iteration.
*/
template
<
typename
T
>
class
ForEachOp
{
public
:
explicit
ForEachOp
(
const
CallArgs
&
args
)
:
explicit
ForEachOp
(
const
embb
::
base
::
perf
::
CallArgs
&
args
)
:
load_factor
(
args
.
LoadFactor
())
{
}
void
operator
()(
T
&
val
)
const
{
T
x
=
val
;
...
...
@@ -54,13 +52,13 @@ public:
val
=
x
;
}
private
:
unsigned
in
t
load_factor
;
size_
t
load_factor
;
};
template
<
typename
T
>
class
ForEachFunctor
{
public
:
ForEachFunctor
(
const
CallArgs
&
args
)
:
ForEachFunctor
(
const
embb
::
base
::
perf
::
CallArgs
&
args
)
:
cargs
(
args
),
op
(
args
)
{
}
void
operator
()(
T
&
value
)
const
{
op
(
value
);
...
...
@@ -75,7 +73,7 @@ public:
return
*
this
;
}
private
:
const
CallArgs
&
cargs
;
const
embb
::
base
::
perf
::
CallArgs
&
cargs
;
ForEachOp
<
T
>
op
;
};
...
...
@@ -103,7 +101,7 @@ class ParallelForEach {
public
:
explicit
ParallelForEach
(
const
embb
::
base
::
perf
::
CallArgs
&
args
);
~
ParallelForEach
();
void
Pre
()
{
}
void
Pre
()
;
void
Run
(
unsigned
int
numThreads
);
void
Post
()
{
}
...
...
algorithms_cpp/perf/main.cc
View file @
517d459b
...
...
@@ -24,21 +24,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <partest/partest.h>
#include <embb/base/core_set.h>
#include <embb/base/thread.h>
#include <embb/base/c/log.h>
#include <embb/base/c/internal/thread_index.h>
#include <embb/tasks/tasks.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/perf/timer.h>
#include <for_each_perf.h>
#include <reduce_perf.h>
#include <scan_perf.h>
...
...
@@ -46,159 +33,33 @@
#include <quick_sort_perf.h>
#include <merge_sort_perf.h>
#include <embb/base/perf/performance_test.h>
#include <embb/base/perf/perf.h>
#include <embb/base/perf/speedup_test.h>
using
namespace
embb
::
algorithms
::
perf
;
using
embb
::
algorithms
::
perf
::
SerialForEach
;
using
embb
::
algorithms
::
perf
::
SerialReduce
;
using
embb
::
algorithms
::
perf
::
SerialScan
;
using
embb
::
algorithms
::
perf
::
SerialCount
;
using
embb
::
algorithms
::
perf
::
SerialScan
;
using
embb
::
algorithms
::
perf
::
SerialMergeSort
;
using
embb
::
algorithms
::
perf
::
SerialQuickSort
;
using
embb
::
algorithms
::
perf
::
ParallelForEach
;
using
embb
::
algorithms
::
perf
::
ParallelReduce
;
using
embb
::
algorithms
::
perf
::
ParallelScan
;
using
embb
::
algorithms
::
perf
::
ParallelCount
;
using
embb
::
algorithms
::
perf
::
ParallelMergeSort
;
using
embb
::
algorithms
::
perf
::
ParallelQuickSort
;
using
embb
::
base
::
perf
::
Timer
;
using
embb
::
base
::
perf
::
CallArgs
;
using
embb
::
base
::
perf
::
PerformanceTest
;
#if 0
void ReportResult(
const std::string & name,
unsigned int threads,
const CallArgs & args,
double elapsed,
double speedup) {
std::cout
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
std::ofstream file;
std::string filename = "performance_tests_result.csv";
file.open(filename.c_str(), ::std::ofstream::out | ::std::ofstream::app);
file
<< name << ","
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
}
template< typename TestSerial, typename TestParallel >
void RunPerformanceTest(
const embb::base::perf::CallArgs & args,
const std::string & name) {
std::cout << "--- Running " << name << std::endl;
// Initialize new test instances:
TestParallel testParallel(args);
// Parallel runs:
unsigned int threads = 1;
// Base value to compute speedup; parallel execution
// with 1 thread or serial execution.
double baseDuration = 0;
// Whether to use serial or parallel exeuction using 1
// thread for speedup reference:
if (args.ParallelBaseReference() == 0) {
TestSerial testSerial(args);
// Serial run:
Timer t;
testSerial.Run();
double elapsed = t.Elapsed();
ReportResult(
name,
0,
args,
elapsed,
1.0);
baseDuration = elapsed;
}
threads += args.ParallelBaseReference();
// Run executions with incrementing number of threads:
embb_internal_thread_index_set_max(args.MaxThreads());
while(threads <= args.MaxThreads()) {
// Set number of available threads to given limit:
// embb::base::Thread::SetThreadsMaxCount(threads);
embb_internal_thread_index_reset();
// Configure cores to be used by EMBB:
embb::base::CoreSet cores(false);
for (unsigned int coreId = 0; coreId < threads; ++coreId) {
cores.Add(coreId);
}
embb::tasks::Node::Initialize(
1, 1, cores,
MTAPI_NODE_MAX_TASKS_DEFAULT * 8,
MTAPI_NODE_MAX_GROUPS_DEFAULT * 8,
MTAPI_NODE_MAX_QUEUES_DEFAULT * 8,
MTAPI_NODE_QUEUE_LIMIT_DEFAULT * 8,
MTAPI_NODE_MAX_PRIORITIES_DEFAULT);
// Test setup:
testParallel.Pre();
// Initialize timer:
Timer t;
// Run the test body:
testParallel.Run(threads);
// Report duration:
double elapsed = t.Elapsed();
if (threads == 1) {
baseDuration = elapsed;
}
ReportResult(
name,
threads,
args,
elapsed,
static_cast<double>(baseDuration) / static_cast<double>(elapsed));
// Test teardown:
testParallel.Post();
if (threads < 4) {
++threads;
} else {
threads += 4;
}
embb::tasks::Node::Finalize();
}
}
int main(int argc, char * argv[]) {
// Parse command line arguments:
embb::base::perf::CallArgs args;
try {
args.Parse(argc, argv);
} catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
RunPerformanceTest< SerialForEach<float>, ParallelForEach<float> >(args, "ForEach");
RunPerformanceTest< SerialReduce<float>, ParallelReduce<float> >(args, "Reduce");
RunPerformanceTest< SerialScan<float>, ParallelScan<float> >(args, "Scan");
RunPerformanceTest< SerialCount<float>, ParallelCount<float> >(args, "Count");
RunPerformanceTest< SerialQuickSort<float>, ParallelQuickSort<float> >(args, "Quicksort");
RunPerformanceTest< SerialMergeSort<float>, ParallelMergeSort<float> >(args, "Mergesort");
return 0;
}
#endif
using
embb
::
base
::
perf
::
SpeedupTest
;
int
main
(
int
argc
,
char
*
argv
[])
{
// Parse command line arguments:
CallArgs
args
;
try
{
args
.
Parse
(
argc
,
argv
);
}
catch
(
::
std
::
runtime_error
&
re
)
{
::
std
::
cerr
<<
re
.
what
()
<<
::
std
::
endl
;
}
// Print test settings:
args
.
Print
(
::
std
::
cout
);
// Run tests:
PerformanceTest
<
SerialForEach
<
float
>
,
ParallelForEach
<
float
>
,
CallArgs
>
test
(
args
);
test
.
Run
();
test
.
PrintReport
(
std
::
cout
);
#define COMMA ,
return
0
;
PT_PERF_MAIN
(
"Algorithms"
)
{
PT_PERF_RUN
(
SpeedupTest
<
SerialForEach
<
float
>
COMMA
ParallelForEach
<
float
>
>
);
PT_PERF_RUN
(
SpeedupTest
<
SerialReduce
<
float
>
COMMA
ParallelReduce
<
float
>
>
);
PT_PERF_RUN
(
SpeedupTest
<
SerialScan
<
float
>
COMMA
ParallelScan
<
float
>
>
);
PT_PERF_RUN
(
SpeedupTest
<
SerialCount
<
float
>
COMMA
ParallelCount
<
float
>
>
);
PT_PERF_RUN
(
SpeedupTest
<
SerialMergeSort
<
float
>
COMMA
ParallelMergeSort
<
float
>
>
);
PT_PERF_RUN
(
SpeedupTest
<
SerialQuickSort
<
float
>
COMMA
ParallelQuickSort
<
float
>
>
);
}
algorithms_cpp/perf/merge_sort_perf-inl.h
View file @
517d459b
...
...
@@ -43,9 +43,6 @@ SerialMergeSort<T>::SerialMergeSort(const embb::base::perf::CallArgs & args)
:
cargs
(
args
),
vector_size
(
args
.
VectorSize
())
{
v
=
static_cast
<
T
*>
(
embb
::
base
::
Allocation
::
AllocateCacheAligned
(
vector_size
*
sizeof
(
T
)));
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
template
<
typename
T
>
...
...
@@ -74,6 +71,14 @@ ParallelMergeSort<T>::~ParallelMergeSort() {
}
template
<
typename
T
>
void
ParallelMergeSort
<
T
>::
Pre
()
{
// Initialize input vector with incrementing values:
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
template
<
typename
T
>
void
ParallelMergeSort
<
T
>::
Run
(
unsigned
int
numThreads
)
{
// Always reading input values from memory, no CPU-only test possible
// as mergesort sorts in-place.
...
...
algorithms_cpp/perf/merge_sort_perf.h
View file @
517d459b
...
...
@@ -59,7 +59,7 @@ class ParallelMergeSort {
public
:
explicit
ParallelMergeSort
(
const
embb
::
base
::
perf
::
CallArgs
&
args
);
~
ParallelMergeSort
();
void
Pre
()
{
}
void
Pre
()
;
void
Run
(
unsigned
int
numThreads
);
void
Post
()
{
}
...
...
algorithms_cpp/perf/quick_sort_perf-inl.h
View file @
517d459b
...
...
@@ -72,9 +72,6 @@ ParallelQuickSort<T>::ParallelQuickSort(const embb::base::perf::CallArgs & args)
:
cargs
(
args
),
vector_size
(
args
.
VectorSize
())
{
v
=
static_cast
<
T
*>
(
embb
::
base
::
Allocation
::
AllocateCacheAligned
(
vector_size
*
sizeof
(
T
)));
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
template
<
typename
T
>
...
...
@@ -83,6 +80,14 @@ ParallelQuickSort<T>::~ParallelQuickSort() {
}
template
<
typename
T
>
void
ParallelQuickSort
<
T
>::
Pre
()
{
// Initialize input vector with incrementing values:
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
template
<
typename
T
>
void
ParallelQuickSort
<
T
>::
Run
(
unsigned
int
numThreads
)
{
// Always reading input values from memory, no CPU-only test possible
// as quicksort sorts in-place.
...
...
algorithms_cpp/perf/quick_sort_perf.h
View file @
517d459b
...
...
@@ -59,7 +59,7 @@ class ParallelQuickSort {
public
:
explicit
ParallelQuickSort
(
const
embb
::
base
::
perf
::
CallArgs
&
args
);
~
ParallelQuickSort
();
void
Pre
()
{
}
void
Pre
()
;
void
Run
(
unsigned
int
numThreads
);
void
Post
()
{
}
...
...
algorithms_cpp/perf/reduce_perf-inl.h
View file @
517d459b
...
...
@@ -93,9 +93,6 @@ ParallelReduce<T>::ParallelReduce(
v
=
static_cast
<
T
*>
(
embb
::
base
::
Allocation
::
AllocateCacheAligned
(
vector_size
*
sizeof
(
T
)));
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
(
T
)
i
;
}
}
else
{
v
=
0
;
...
...
@@ -110,6 +107,16 @@ ParallelReduce<T>::~ParallelReduce() {
}
template
<
typename
T
>
void
ParallelReduce
<
T
>::
Pre
()
{
if
(
cargs
.
StressMode
()
==
CallArgs
::
RAM_STRESS
)
{
// Initialize input vector with incrementing values:
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
v
[
i
]
=
(
T
)
i
;
}
}
}
template
<
typename
T
>
void
ParallelReduce
<
T
>::
Run
(
unsigned
int
numThreads
)
{
TransformOp
<
T
>
op
(
static_cast
<
T
>
(
1
)
/
vector_size
,
cargs
);
if
(
cargs
.
StressMode
()
==
CallArgs
::
CPU_STRESS
)
{
...
...
algorithms_cpp/perf/reduce_perf.h
View file @
517d459b
...
...
@@ -51,8 +51,8 @@ public:
T
operator
()(
T
val
)
const
{
T
x
=
0
;
// Simulate more complex operation depending on
// load factor. Default load factor is 1.
for
(
size_t
i
=
0
;
i
<
load_factor
*
10
000
;
++
i
)
{
// load factor. Default load factor is 1
00
.
for
(
size_t
i
=
0
;
i
<
load_factor
*
10
;
++
i
)
{
x
=
(
val
+
static_cast
<
T
>
(
0
.
5
))
*
step_size
*
i
;
x
=
static_cast
<
T
>
(
4
.
0
/
(
1
.
0
+
x
*
x
/
load_factor
));
}
...
...
@@ -87,7 +87,7 @@ public:
explicit
ParallelReduce
(
const
embb
::
base
::
perf
::
CallArgs
&
args
);
~
ParallelReduce
();
void
Pre
()
{
}
void
Pre
()
;
void
Run
(
unsigned
int
numThreads
);
void
Post
()
{
}
...
...
algorithms_cpp/perf/scan_perf-inl.h
View file @
517d459b
...
...
@@ -101,9 +101,6 @@ ParallelScan<T>::ParallelScan(const embb::base::perf::CallArgs & args) :
if
(
cargs
.
StressMode
()
==
CallArgs
::
RAM_STRESS
)
{
in
=
(
T
*
)
Allocation
::
AllocateCacheAligned
(
vector_size
*
sizeof
(
T
));
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
in
[
i
]
=
static_cast
<
T
>
(
1
);
}
}
else
{
in
=
0
;
...
...
@@ -120,6 +117,16 @@ ParallelScan<T>::~ParallelScan() {
}
template
<
typename
T
>
void
ParallelScan
<
T
>::
Pre
()
{
if
(
cargs
.
StressMode
()
==
CallArgs
::
RAM_STRESS
)
{
// Initialize input vector with 1's:
for
(
size_t
i
=
0
;
i
<
vector_size
;
i
++
)
{
in
[
i
]
=
static_cast
<
T
>
(
1
);
}
}
}
template
<
typename
T
>
void
ParallelScan
<
T
>::
Run
(
unsigned
int
numThreads
)
{
if
(
cargs
.
StressMode
()
==
CallArgs
::
CPU_STRESS
)
{
CpuStressScanOp
<
T
>
op
(
cargs
);
...
...
algorithms_cpp/perf/scan_perf.h
View file @
517d459b
...
...
@@ -110,7 +110,7 @@ class ParallelScan {
public
:
explicit
ParallelScan
(
const
embb
::
base
::
perf
::
CallArgs
&
args
);
~
ParallelScan
();
void
Pre
()
{
}
void
Pre
()
;
void
Run
(
unsigned
int
numThreads
);
void
Post
()
{
}
...
...
base_cpp/perf/include/embb/base/perf/call_args.h
View file @
517d459b
...
...
@@ -36,33 +36,30 @@ namespace base {
namespace
perf
{
class
CallArgs
{
public
:
typedef
enum
{
UNDEFINED_SCALAR_TYPE
=
0
,
FLOAT
,
DOUBLE
}
ScalarType
;
public
:
typedef
enum
{
UNDEFINED_STRESS_TYPE
=
0
,
RAM_STRESS
,
CPU_STRESS
}
StressType
;
public
:
public
:
inline
CallArgs
()
:
element_type
(
CallArgs
::
FLOAT
),
stress_type
(
CallArgs
::
RAM_STRESS
),
max_threads
(
embb
::
base
::
CoreSet
::
CountAvailable
()),
vector_size
(
10000000
),
load_factor
(
100
),
parallel_base_ref
(
0
),
load_factor
(
10
),
counter_scale
(
0
)
{
}
inline
CallArgs
(
const
CallArgs
&
other
)
:
stress_type
(
other
.
stress_type
),
max_threads
(
other
.
max_threads
),
vector_size
(
other
.
vector_size
),
load_factor
(
other
.
load_factor
),
counter_scale
(
other
.
counter_scale
)
{
}
inline
CallArgs
(
int
argc
,
char
*
argv
[])
{
Parse
(
argc
,
argv
);
}
...
...
@@ -79,18 +76,6 @@ public:
return
counter_scale
;
}
inline
ScalarType
ElementType
()
const
{
return
element_type
;
}
inline
::
std
::
string
ElementTypeName
()
const
{
return
((
ElementType
()
==
UNDEFINED_SCALAR_TYPE
)
?
"undefined"
:
((
ElementType
()
==
FLOAT
)
?
"float"
:
"double"
));
}
inline
StressType
StressMode
()
const
{
return
stress_type
;
}
...
...
@@ -109,20 +94,12 @@ public:
return
load_factor
;
}
inline
unsigned
int
ParallelBaseReference
()
const
{
return
parallel_base_ref
;;
}
private
:
ScalarType
element_type
;
private
:
StressType
stress_type
;
size_t
max_threads
;
size_t
vector_size
;
size_t
load_factor
;
unsigned
int
parallel_base_ref
;
unsigned
int
counter_scale
;
};
}
// namespace perf
...
...
base_cpp/perf/include/embb/base/perf/perf_test_unit.h
→
base_cpp/perf/include/embb/base/perf/p
arallel_p
erf_test_unit.h
View file @
517d459b
...
...
@@ -24,14 +24,15 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_PERF_TEST_UNIT_H_
#ifndef EMBB_BASE_PERF_P
ARALLEL_P
ERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_P
ARALLEL_P
ERF_TEST_UNIT_H_
#include <cmath>
#include <vector>
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/call_args.h>
#include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
...
...
@@ -63,56 +64,46 @@ namespace perf {
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template
<
typename
F
,
class
TestParams
>
class
PerfTestUnit
:
public
partest
::
TestUnit
{
template
<
typename
ParallelF
>
class
P
arallelP
erfTestUnit
:
public
partest
::
TestUnit
{
public
:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit
PerfTestUnit
(
const
TestParams
&
params
)
:
partest
::
TestUnit
(
"PTU"
),
params_
(
params
),
duration_
(
0
)
{
func
=
new
F
(
params_
);
// Add(&PerfTestUnit<F, TestParams>::Run, this);
explicit
ParallelPerfTestUnit
(
const
CallArgs
&
params
)
:
partest
::
TestUnit
(
"ParallelPerfTestUnit"
),
params_
(
params
)
{
func_
=
new
ParallelF
(
params_
);
Add
(
&
ParallelPerfTestUnit
<
ParallelF
>::
Run
,
this
);
}
/**
* Destructs PerfTestUnit
* Destructs P
arallelP
erfTestUnit
*/
~
PerfTestUnit
()
{
delete
func
;
~
Parallel
PerfTestUnit
()
{
delete
func
_
;
}
/**
* Returns duration of this unit in microseconds.
* \return Duration of this unit in microseconds.
* Durations of single runs of this unit in microseconds.
* \return Vector of durations of single runs of this unit
* ordered by number of threads, in microseconds.
*/
double
GetDuration
()
const
{
return
duration_
;
}
#if 0
/**
* Returns thread count of this unit.
* \return Thread count of this unit.
*/
size_t GetThreadCount() const { return thread_count_; }
const
std
::
vector
<
std
::
pair
<
unsigned
int
,
double
>
>
&
GetDurations
()
const
{
return
durations_
;
}
private
:
/**
* Returns iteration count of this unit.
* \return Iteration count of this unit.
* Run performance test
*/
size_t GetIterationCount() const { return iteration_count_; }
#endif
private
:
void
Run
()
{
for
(
unsigned
int
num_threads
=
1
;
num_threads
<
params_
.
MaxThreads
();)
{
func
->
Pre
();
for
(
unsigned
int
num_threads
=
1
;
num_threads
<=
params_
.
MaxThreads
();)
{
func_
->
Pre
();
Tic
();
func
->
Run
(
num_threads
);
Toc
(
);
func
->
Post
();
func_
->
Run
(
num_threads
);
Toc
(
num_threads
);
func_
->
Post
();
if
(
num_threads
<
4
)
{
num_threads
++
;
}
else
{
...
...
@@ -149,26 +140,25 @@ class PerfTestUnit : public partest::TestUnit {
/**
* Stops timer and resets EMBB
*/
void
Toc
()
{
void
Toc
(
unsigned
int
num_threads
)
{
// stop timer
duration_
=
timer_
.
Elapsed
();
durations_
.
push_back
(
std
::
make_pair
(
num_threads
,
timer_
.
Elapsed
()));
embb
::
tasks
::
Node
::
Finalize
();
}
const
TestParams
&
params_
;
double
duration_
;
//size_t thread_count_;
//size_t iteration_count_;
const
CallArgs
params_
;
std
::
vector
<
std
::
pair
<
unsigned
int
,
double
>
>
durations_
;
Timer
timer_
;
F
*
func
;
ParallelF
*
func_
;
// prohibit copy and assignment
P
erfTestUnit
(
const
PerfTestUnit
&
other
);
P
erfTestUnit
&
operator
=
(
const
PerfTestUnit
&
other
);
P
arallelPerfTestUnit
(
const
Parallel
PerfTestUnit
&
other
);
P
arallelPerfTestUnit
&
operator
=
(
const
ParallelPerfTestUnit
&
other
);
};
}
// perf
}
// base
}
// embb
#endif
/* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */
#endif
// EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
base_cpp/perf/include/embb/base/perf/perf.h
0 → 100644
View file @
517d459b
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_PERF_H_
#define EMBB_BASE_CPP_PERF_PERF_H_
#include <embb/base/perf/call_args.h>
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#define PT_PERF_MAIN(component) \
template <class Test> \
void PartestRunPerformanceTest(Test & test) { \
test.Run(); \
test.PrintReport(std::cout); \
} \
void PartestRunPerformanceTests( \
embb::base::perf::CallArgs & perf_test_params); \
int main(int argc, char** argv) { \
std::cout << component << ::std::endl; \
embb::base::perf::CallArgs perf_test_params; \
try { \
perf_test_params.Parse(argc, argv); \
} \
catch (::std::runtime_error & re) { \
::std::cerr << re.what() << ::std::endl; \
} \
perf_test_params.Print(::std::cout); \
PartestRunPerformanceTests(perf_test_params); \
} \
void PartestRunPerformanceTests( \
embb::base::perf::CallArgs & perf_test_params)
#define PT_PERF_RUN(PT_PERF_TEST) \
( \
(std::cout << "Running " << #PT_PERF_TEST << " ..." << std::endl), \
PartestRunPerformanceTest<PT_PERF_TEST>(PT_PERF_TEST(perf_test_params)), \
(std::cout << "Running " << #PT_PERF_TEST << " ..." << " done" << std::endl) \
)
#endif // EMBB_BASE_CPP_PERF_PERF_H_
\ No newline at end of file
base_cpp/perf/include/embb/base/perf/performance_test.h
View file @
517d459b
...
...
@@ -51,9 +51,6 @@ class PerformanceTest : public partest::TestCase {
*/
explicit
PerformanceTest
(
const
TestParams
&
params
)
:
partest
::
TestCase
(),
params_
(
params
)
{
// maximum one thread per available core
size_t
threads
=
std
::
min
<
size_t
>
(
params
.
MaxThreads
(),
embb
::
base
::
CoreSet
::
CountAvailable
());
unit_
=
&
CreateUnit
<
PerfTestUnit
<
ParallelF
,
TestParams
>
>
(
params_
);
}
...
...
@@ -82,8 +79,8 @@ class PerformanceTest : public partest::TestCase {
PerformanceTest
&
operator
=
(
const
PerformanceTest
&
other
);
};
}
/* perf */
}
/* base */
}
/* embb */
}
// perf
}
// base
}
// embb
#endif
/* EMBB_BASE_PERF_PERFORMANCE_TEST_H_ */
#endif
// EMBB_BASE_PERF_PERFORMANCE_TEST_H_
base_cpp/perf/include/embb/base/perf/serial_perf_test_unit.h
0 → 100644
View file @
517d459b
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
#include <cmath>
#include <vector>
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/call_args.h>
#include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
#define THIS_DOMAIN_ID 1
#define THIS_NODE_ID 1
namespace
embb
{
namespace
base
{
namespace
perf
{
/**
* \defgroup CPP_BASE_PERF Performance Tests
*
* Performance Test Framework
*
* \ingroup CPP_BASE
*/
/**
* Performance Test Unit
*
* Base unit of any test (Speedup Test, Performance Test, ...). Takes a
* non-copyable Functor as template argument and executes it \c iteration_count
* times on \c thread_count worker threads.
*
* If \c thread_count equals 0, EMBB is not initialized and the Functor is
* executed without EMBB support.
*
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template
<
typename
SerialF
>
class
SerialPerfTestUnit
:
public
partest
::
TestUnit
{
public
:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit
SerialPerfTestUnit
(
const
CallArgs
&
params
)
:
partest
::
TestUnit
(
"SerialPerfTestUnit"
),
params_
(
params
),
duration_
(
0
)
{
func_
=
new
SerialF
(
params_
);
Add
(
&
SerialPerfTestUnit
<
SerialF
>::
Run
,
this
);
}
/**
* Destructs SerialPerfTestUnit
*/
~
SerialPerfTestUnit
()
{
delete
func_
;
}
/**
* Durations of single runs of this unit in microseconds.
* \return Vector of durations of single runs of this unit
* ordered by number of threads, in microseconds.
*/
double
GetDuration
()
const
{
return
duration_
;
}
private
:
/**
* Run performance test
*/
void
Run
()
{
func_
->
Pre
();
Tic
();
func_
->
Run
();
Toc
();
func_
->
Post
();
}
/**
* Sets up EMBB and starts timer.
*/
void
Tic
()
{
// start timer
timer_
=
Timer
();
}
/**
* Stops timer and resets EMBB
*/
void
Toc
()
{
// stop timer
duration_
=
timer_
.
Elapsed
();
}
const
CallArgs
params_
;
double
duration_
;
Timer
timer_
;
SerialF
*
func_
;
// prohibit copy and assignment
SerialPerfTestUnit
(
const
SerialPerfTestUnit
&
other
);
SerialPerfTestUnit
&
operator
=
(
const
SerialPerfTestUnit
&
other
);
};
}
// perf
}
// base
}
// embb
#endif // EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
base_cpp/perf/include/embb/base/perf/speedup_test.h
View file @
517d459b
...
...
@@ -31,7 +31,9 @@
#include <iomanip>
#include <partest/partest.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/perf_test_unit.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/perf/parallel_perf_test_unit.h>
#include <embb/base/perf/serial_perf_test_unit.h>
namespace
embb
{
namespace
base
{
...
...
@@ -49,32 +51,18 @@ namespace perf {
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template
<
typename
ParallelF
,
typename
Seria
lF
>
template
<
typename
SerialF
,
typename
Paralle
lF
>
class
SpeedupTest
:
public
partest
::
TestCase
{
public
:
/**
* Constructs SpeedupTest and creates test units.
*/
explicit
SpeedupTest
(
size_t
max_thread_count
=
partest
::
TestSuite
::
GetDefaultNumThreads
(),
size_t
iteration_count
=
partest
::
TestSuite
::
GetDefaultNumIterations
())
:
partest
::
TestCase
()
{
/* maximum one thread per available core */
size_t
threads
=
std
::
min
<
size_t
>
(
max_thread_count
,
embb
::
base
::
CoreSet
::
CountAvailable
());
std
::
cout
<<
"Test configuration ------------------------------------"
<<
std
::
endl
;
std
::
cout
<<
" Num threads: "
<<
threads
<<
std
::
endl
;
std
::
cout
<<
" Iterations: "
<<
iteration_count
<<
std
::
endl
;
/* create unit for serial version */
ser_unit_
=
&
CreateUnit
<
PerfTestUnit
<
SerialF
>
>
(
0
,
iteration_count
);
/* create log2(threads)+1 units for parallel version */
for
(
size_t
i
=
1
;
i
<=
threads
;
i
=
i
*
2
)
{
par_units_
.
push_back
(
&
CreateUnit
<
PerfTestUnit
<
ParallelF
>
>
(
i
,
iteration_count
));
}
explicit
SpeedupTest
(
const
embb
::
base
::
perf
::
CallArgs
&
params
)
:
partest
::
TestCase
(),
params_
(
params
)
{
// create unit for serial version
ser_unit_
=
&
CreateUnit
<
SerialPerfTestUnit
<
SerialF
>
,
CallArgs
>
(
params_
);
// create unit for parallel version
par_unit_
=
&
CreateUnit
<
ParallelPerfTestUnit
<
ParallelF
>
,
CallArgs
>
(
params_
);
}
/**
...
...
@@ -87,31 +75,39 @@ class SpeedupTest : public partest::TestCase {
* Prints the durations of all units in comma separated format.
*/
void
PrintReport
(
std
::
ostream
&
ostr
)
{
/* print sample row for sequential run (degree 0): */
double
serial_duration
=
ser_unit_
->
GetDuration
();
// print sample row for sequential run (degree 0):
ostr
<<
"0,"
<<
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
ser_unit_
->
GetDuration
()
<<
std
::
endl
;
/* print sample rows for parallel runs (degree > 0): */
for
(
int
i
=
0
;
i
<
par_units_
.
size
();
++
i
)
{
ostr
<<
std
::
fixed
<<
par_units_
[
i
]
->
GetThreadCount
()
<<
serial_duration
<<
","
<<
std
::
fixed
<<
1
.
0
<<
std
::
endl
;
// print sample rows for parallel runs (degree > 0):
std
::
vector
<
std
::
pair
<
unsigned
int
,
double
>
>
durations
=
par_unit_
->
GetDurations
();
for
(
unsigned
int
i
=
0
;
i
<
durations
.
size
();
++
i
)
{
ostr
<<
std
::
fixed
<<
durations
[
i
].
first
<<
","
<<
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
par_units_
[
i
]
->
GetDuration
()
<<
durations
[
i
].
second
<<
","
<<
std
::
fixed
<<
serial_duration
/
durations
[
i
].
second
<<
std
::
endl
;
}
}
private
:
std
::
vector
<
PerfTestUnit
<
ParallelF
>
*>
par_units_
;
PerfTestUnit
<
SerialF
>
*
ser_unit_
;
const
CallArgs
&
params_
;
ParallelPerfTestUnit
<
ParallelF
>
*
par_unit_
;
SerialPerfTestUnit
<
SerialF
>
*
ser_unit_
;
/* prohibit copy and assignment */
SpeedupTest
(
const
SpeedupTest
&
other
);
SpeedupTest
&
operator
=
(
const
SpeedupTest
&
other
);
};
}
/* perf */
}
/* base */
}
/* embb */
}
// perf
}
// base
}
// embb
#endif
/* EMBB_BASE_PERF_SPEEDUP_TEST_H_ */
base_cpp/perf/src/call_args.cc
View file @
517d459b
...
...
@@ -37,17 +37,6 @@ void CallArgs::Parse(int argc, char * argv[]) {
counter_scale
=
scale_param
;
}
}
// Element type:
if
(
std
::
string
(
argv
[
paramIndex
])
==
"-e"
)
{
element_type
=
UNDEFINED_SCALAR_TYPE
;
::
std
::
string
type
=
argv
[
paramIndex
+
1
];
if
(
type
==
"float"
)
{
element_type
=
FLOAT
;
}
else
if
(
type
==
"double"
)
{
element_type
=
DOUBLE
;
}
}
// Stress type:
if
(
std
::
string
(
argv
[
paramIndex
])
==
"-s"
)
{
stress_type
=
UNDEFINED_STRESS_TYPE
;
...
...
@@ -64,15 +53,6 @@ void CallArgs::Parse(int argc, char * argv[]) {
load_factor
=
static_cast
<
size_t
>
(
atoi
(
argv
[
paramIndex
+
1
]));
}
// Additional test parameter:
if
(
std
::
string
(
argv
[
paramIndex
])
==
"-p"
)
{
parallel_base_ref
=
atoi
(
argv
[
paramIndex
+
1
]);
}
// Sanitizing and error handling:
if
(
element_type
==
UNDEFINED_SCALAR_TYPE
)
{
throw
::
std
::
runtime_error
(
"Invalid setting for element type (-e int|float|double)"
);
}
if
(
stress_type
==
UNDEFINED_STRESS_TYPE
)
{
throw
::
std
::
runtime_error
(
"Invalid setting for stress test type (-s ram|cpu)"
);
...
...
@@ -88,9 +68,7 @@ void CallArgs::Print(std::ostream & os) {
os
<<
"Max. threads: (-t) "
<<
MaxThreads
()
<<
std
::
endl
<<
"Vector size: (-n) "
<<
VectorSize
()
<<
std
::
endl
<<
"Load factor: (-l) "
<<
LoadFactor
()
<<
std
::
endl
<<
"Element type: (-e) "
<<
ElementTypeName
()
<<
std
::
endl
<<
"Stress mode: (-s) "
<<
StressModeName
()
<<
std
::
endl
<<
"Serial base ref: (-p) "
<<
ParallelBaseReference
()
<<
std
::
endl
<<
"Time sampling: (-f) "
<<
embb
::
base
::
perf
::
Timer
::
TimerName
()
<<
std
::
endl
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment