Commit 517d459b by Tobias Fuchs

performance tests: finished refactoring of performance test framework

parent f9beb8b6
......@@ -79,9 +79,6 @@ ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args)
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
} else {
v = 0;
}
......@@ -95,6 +92,16 @@ ParallelForEach<T>::~ParallelForEach() {
}
template<typename T>
void ParallelForEach<T>::Pre() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
}
template<typename T>
void ParallelForEach<T>::Run(unsigned int numThreads) {
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
// Computing input values, no memory access
......
......@@ -34,15 +34,13 @@ namespace embb {
namespace algorithms {
namespace perf {
using embb::base::perf::CallArgs;
/**
* Operation performed in each loop iteration.
*/
template<typename T>
class ForEachOp {
public:
explicit ForEachOp(const CallArgs & args) :
explicit ForEachOp(const embb::base::perf::CallArgs & args) :
load_factor(args.LoadFactor()) { }
void operator()(T & val) const {
T x = val;
......@@ -54,13 +52,13 @@ public:
val = x;
}
private:
unsigned int load_factor;
size_t load_factor;
};
template<typename T>
class ForEachFunctor {
public:
ForEachFunctor(const CallArgs & args) :
ForEachFunctor(const embb::base::perf::CallArgs & args) :
cargs(args), op(args) { }
void operator()(T & value) const {
op(value);
......@@ -75,7 +73,7 @@ public:
return *this;
}
private:
const CallArgs & cargs;
const embb::base::perf::CallArgs & cargs;
ForEachOp<T> op;
};
......@@ -103,7 +101,7 @@ class ParallelForEach {
public:
explicit ParallelForEach(const embb::base::perf::CallArgs & args);
~ParallelForEach();
void Pre() { }
void Pre();
void Run(unsigned int numThreads);
void Post() { }
......
......@@ -24,21 +24,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <partest/partest.h>
#include <embb/base/core_set.h>
#include <embb/base/thread.h>
#include <embb/base/c/log.h>
#include <embb/base/c/internal/thread_index.h>
#include <embb/tasks/tasks.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/perf/timer.h>
#include <for_each_perf.h>
#include <reduce_perf.h>
#include <scan_perf.h>
......@@ -46,159 +33,33 @@
#include <quick_sort_perf.h>
#include <merge_sort_perf.h>
#include <embb/base/perf/performance_test.h>
#include <embb/base/perf/perf.h>
#include <embb/base/perf/speedup_test.h>
using namespace embb::algorithms::perf;
using embb::algorithms::perf::SerialForEach;
using embb::algorithms::perf::SerialReduce;
using embb::algorithms::perf::SerialScan;
using embb::algorithms::perf::SerialCount;
using embb::algorithms::perf::SerialScan;
using embb::algorithms::perf::SerialMergeSort;
using embb::algorithms::perf::SerialQuickSort;
using embb::algorithms::perf::ParallelForEach;
using embb::algorithms::perf::ParallelReduce;
using embb::algorithms::perf::ParallelScan;
using embb::algorithms::perf::ParallelCount;
using embb::algorithms::perf::ParallelMergeSort;
using embb::algorithms::perf::ParallelQuickSort;
using embb::base::perf::Timer;
using embb::base::perf::CallArgs;
using embb::base::perf::PerformanceTest;
#if 0
void ReportResult(
const std::string & name,
unsigned int threads,
const CallArgs & args,
double elapsed,
double speedup) {
std::cout
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
std::ofstream file;
std::string filename = "performance_tests_result.csv";
file.open(filename.c_str(), ::std::ofstream::out | ::std::ofstream::app);
file
<< name << ","
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
}
using embb::base::perf::SpeedupTest;
template< typename TestSerial, typename TestParallel >
void RunPerformanceTest(
const embb::base::perf::CallArgs & args,
const std::string & name) {
std::cout << "--- Running " << name << std::endl;
// Initialize new test instances:
TestParallel testParallel(args);
// Parallel runs:
unsigned int threads = 1;
// Base value to compute speedup; parallel execution
// with 1 thread or serial execution.
double baseDuration = 0;
#define COMMA ,
// Whether to use serial or parallel exeuction using 1
// thread for speedup reference:
if (args.ParallelBaseReference() == 0) {
TestSerial testSerial(args);
// Serial run:
Timer t;
testSerial.Run();
double elapsed = t.Elapsed();
ReportResult(
name,
0,
args,
elapsed,
1.0);
baseDuration = elapsed;
}
threads += args.ParallelBaseReference();
// Run executions with incrementing number of threads:
embb_internal_thread_index_set_max(args.MaxThreads());
while(threads <= args.MaxThreads()) {
// Set number of available threads to given limit:
// embb::base::Thread::SetThreadsMaxCount(threads);
embb_internal_thread_index_reset();
// Configure cores to be used by EMBB:
embb::base::CoreSet cores(false);
for (unsigned int coreId = 0; coreId < threads; ++coreId) {
cores.Add(coreId);
}
embb::tasks::Node::Initialize(
1, 1, cores,
MTAPI_NODE_MAX_TASKS_DEFAULT * 8,
MTAPI_NODE_MAX_GROUPS_DEFAULT * 8,
MTAPI_NODE_MAX_QUEUES_DEFAULT * 8,
MTAPI_NODE_QUEUE_LIMIT_DEFAULT * 8,
MTAPI_NODE_MAX_PRIORITIES_DEFAULT);
// Test setup:
testParallel.Pre();
// Initialize timer:
Timer t;
// Run the test body:
testParallel.Run(threads);
// Report duration:
double elapsed = t.Elapsed();
if (threads == 1) {
baseDuration = elapsed;
}
ReportResult(
name,
threads,
args,
elapsed,
static_cast<double>(baseDuration) / static_cast<double>(elapsed));
// Test teardown:
testParallel.Post();
if (threads < 4) {
++threads;
} else {
threads += 4;
}
embb::tasks::Node::Finalize();
}
PT_PERF_MAIN("Algorithms") {
PT_PERF_RUN(SpeedupTest< SerialForEach<float> COMMA ParallelForEach<float> >);
PT_PERF_RUN(SpeedupTest< SerialReduce<float> COMMA ParallelReduce<float> >);
PT_PERF_RUN(SpeedupTest< SerialScan<float> COMMA ParallelScan<float> >);
PT_PERF_RUN(SpeedupTest< SerialCount<float> COMMA ParallelCount<float> >);
PT_PERF_RUN(SpeedupTest< SerialMergeSort<float> COMMA ParallelMergeSort<float> >);
PT_PERF_RUN(SpeedupTest< SerialQuickSort<float> COMMA ParallelQuickSort<float> >);
}
int main(int argc, char * argv[]) {
// Parse command line arguments:
embb::base::perf::CallArgs args;
try {
args.Parse(argc, argv);
} catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
RunPerformanceTest< SerialForEach<float>, ParallelForEach<float> >(args, "ForEach");
RunPerformanceTest< SerialReduce<float>, ParallelReduce<float> >(args, "Reduce");
RunPerformanceTest< SerialScan<float>, ParallelScan<float> >(args, "Scan");
RunPerformanceTest< SerialCount<float>, ParallelCount<float> >(args, "Count");
RunPerformanceTest< SerialQuickSort<float>, ParallelQuickSort<float> >(args, "Quicksort");
RunPerformanceTest< SerialMergeSort<float>, ParallelMergeSort<float> >(args, "Mergesort");
return 0;
}
#endif
int main(int argc, char * argv[]) {
// Parse command line arguments:
CallArgs args;
try {
args.Parse(argc, argv);
}
catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
PerformanceTest< SerialForEach<float>, ParallelForEach<float>, CallArgs >
test(args);
test.Run();
test.PrintReport(std::cout);
return 0;
}
\ No newline at end of file
......@@ -43,9 +43,6 @@ SerialMergeSort<T>::SerialMergeSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
......@@ -74,6 +71,14 @@ ParallelMergeSort<T>::~ParallelMergeSort() {
}
template<typename T>
void ParallelMergeSort<T>::Pre() {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
void ParallelMergeSort<T>::Run(unsigned int numThreads) {
// Always reading input values from memory, no CPU-only test possible
// as mergesort sorts in-place.
......
......@@ -59,7 +59,7 @@ class ParallelMergeSort {
public:
explicit ParallelMergeSort(const embb::base::perf::CallArgs & args);
~ParallelMergeSort();
void Pre() { }
void Pre();
void Run(unsigned int numThreads);
void Post() { }
......
......@@ -72,9 +72,6 @@ ParallelQuickSort<T>::ParallelQuickSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
......@@ -83,6 +80,14 @@ ParallelQuickSort<T>::~ParallelQuickSort() {
}
template<typename T>
void ParallelQuickSort<T>::Pre() {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
void ParallelQuickSort<T>::Run(unsigned int numThreads) {
// Always reading input values from memory, no CPU-only test possible
// as quicksort sorts in-place.
......
......@@ -59,7 +59,7 @@ class ParallelQuickSort {
public:
explicit ParallelQuickSort(const embb::base::perf::CallArgs & args);
~ParallelQuickSort();
void Pre() { }
void Pre();
void Run(unsigned int numThreads);
void Post() { }
......
......@@ -93,9 +93,6 @@ ParallelReduce<T>::ParallelReduce(
v = static_cast<T*>(
embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = (T)i;
}
}
else {
v = 0;
......@@ -110,6 +107,16 @@ ParallelReduce<T>::~ParallelReduce() {
}
template<typename T>
void ParallelReduce<T>::Pre() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = (T)i;
}
}
}
template<typename T>
void ParallelReduce<T>::Run(unsigned int numThreads) {
TransformOp<T> op(static_cast<T>(1) / vector_size, cargs);
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
......
......@@ -51,8 +51,8 @@ public:
T operator()(T val) const {
T x = 0;
// Simulate more complex operation depending on
// load factor. Default load factor is 1.
for (size_t i = 0; i < load_factor * 10000; ++i) {
// load factor. Default load factor is 100.
for (size_t i = 0; i < load_factor * 10; ++i) {
x = (val + static_cast<T>(0.5)) * step_size * i;
x = static_cast<T>(4.0 / (1.0 + x * x / load_factor));
}
......@@ -87,7 +87,7 @@ public:
explicit ParallelReduce(
const embb::base::perf::CallArgs & args);
~ParallelReduce();
void Pre() { }
void Pre();
void Run(unsigned int numThreads);
void Post() { }
......
......@@ -101,9 +101,6 @@ ParallelScan<T>::ParallelScan(const embb::base::perf::CallArgs & args) :
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
in = (T *) Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
for (size_t i = 0; i < vector_size; i++) {
in[i] = static_cast<T>(1);
}
}
else {
in = 0;
......@@ -120,6 +117,16 @@ ParallelScan<T>::~ParallelScan() {
}
template<typename T>
void ParallelScan<T>::Pre() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Initialize input vector with 1's:
for (size_t i = 0; i < vector_size; i++) {
in[i] = static_cast<T>(1);
}
}
}
template<typename T>
void ParallelScan<T>::Run(unsigned int numThreads) {
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
CpuStressScanOp<T> op(cargs);
......
......@@ -110,7 +110,7 @@ class ParallelScan {
public:
explicit ParallelScan(const embb::base::perf::CallArgs & args);
~ParallelScan();
void Pre() { }
void Pre();
void Run(unsigned int numThreads);
void Post() { }
......
......@@ -36,33 +36,30 @@ namespace base {
namespace perf {
class CallArgs {
public:
typedef enum {
UNDEFINED_SCALAR_TYPE = 0,
FLOAT,
DOUBLE
} ScalarType;
public:
typedef enum {
UNDEFINED_STRESS_TYPE = 0,
RAM_STRESS,
CPU_STRESS
} StressType;
public:
public:
inline CallArgs() :
element_type(CallArgs::FLOAT),
stress_type(CallArgs::RAM_STRESS),
max_threads(embb::base::CoreSet::CountAvailable()),
vector_size(10000000),
load_factor(100),
parallel_base_ref(0),
load_factor(10),
counter_scale(0) {
}
inline CallArgs(const CallArgs & other) :
stress_type(other.stress_type),
max_threads(other.max_threads),
vector_size(other.vector_size),
load_factor(other.load_factor),
counter_scale(other.counter_scale) {
}
inline CallArgs(int argc, char * argv[]) {
Parse(argc, argv);
}
......@@ -79,18 +76,6 @@ public:
return counter_scale;
}
inline ScalarType ElementType() const {
return element_type;
}
inline ::std::string ElementTypeName() const {
return ((ElementType() == UNDEFINED_SCALAR_TYPE)
? "undefined"
: ((ElementType() == FLOAT)
? "float"
: "double"));
}
inline StressType StressMode() const {
return stress_type;
}
......@@ -108,21 +93,13 @@ public:
inline size_t LoadFactor() const {
return load_factor;
}
inline unsigned int ParallelBaseReference() const {
return parallel_base_ref;;
}
private:
ScalarType element_type;
private:
StressType stress_type;
size_t max_threads;
size_t vector_size;
size_t load_factor;
unsigned int parallel_base_ref;
unsigned int counter_scale;
};
} // namespace perf
......
......@@ -24,14 +24,15 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_PERF_TEST_UNIT_H_
#ifndef EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
#include <cmath>
#include <vector>
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/call_args.h>
#include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
......@@ -63,62 +64,52 @@ namespace perf {
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename F, class TestParams >
class PerfTestUnit : public partest::TestUnit {
template<typename ParallelF>
class ParallelPerfTestUnit : public partest::TestUnit {
public:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit PerfTestUnit(const TestParams & params)
: partest::TestUnit("PTU"),
params_(params),
duration_(0) {
func = new F(params_);
// Add(&PerfTestUnit<F, TestParams>::Run, this);
explicit ParallelPerfTestUnit(const CallArgs & params) :
partest::TestUnit("ParallelPerfTestUnit"),
params_(params) {
func_ = new ParallelF(params_);
Add(&ParallelPerfTestUnit<ParallelF>::Run, this);
}
/**
* Destructs PerfTestUnit
* Destructs ParallelPerfTestUnit
*/
~PerfTestUnit() {
delete func;
~ParallelPerfTestUnit() {
delete func_;
}
/**
* Returns duration of this unit in microseconds.
* \return Duration of this unit in microseconds.
* Durations of single runs of this unit in microseconds.
* \return Vector of durations of single runs of this unit
* ordered by number of threads, in microseconds.
*/
double GetDuration() const { return duration_; }
#if 0
/**
* Returns thread count of this unit.
* \return Thread count of this unit.
*/
size_t GetThreadCount() const { return thread_count_; }
/**
* Returns iteration count of this unit.
* \return Iteration count of this unit.
*/
size_t GetIterationCount() const { return iteration_count_; }
#endif
const std::vector< std::pair<unsigned int, double> > & GetDurations() const {
return durations_;
}
private:
void Run() {
for (unsigned int num_threads = 1;
num_threads < params_.MaxThreads();) {
func->Pre();
Tic();
func->Run(num_threads);
Toc();
func->Post();
if (num_threads < 4) {
num_threads++;
} else {
num_threads += 4;
}
}
/**
* Run performance test
*/
void Run() {
for (unsigned int num_threads = 1; num_threads <= params_.MaxThreads();) {
func_->Pre();
Tic();
func_->Run(num_threads);
Toc(num_threads);
func_->Post();
if (num_threads < 4) {
num_threads++;
} else {
num_threads += 4;
}
}
}
/**
......@@ -149,26 +140,25 @@ class PerfTestUnit : public partest::TestUnit {
/**
* Stops timer and resets EMBB
*/
void Toc() {
void Toc(unsigned int num_threads) {
// stop timer
duration_ = timer_.Elapsed();
durations_.push_back(
std::make_pair(num_threads, timer_.Elapsed()));
embb::tasks::Node::Finalize();
}
const TestParams & params_;
double duration_;
//size_t thread_count_;
//size_t iteration_count_;
const CallArgs params_;
std::vector< std::pair<unsigned int, double> > durations_;
Timer timer_;
F *func;
ParallelF * func_;
// prohibit copy and assignment
PerfTestUnit(const PerfTestUnit &other);
PerfTestUnit& operator=(const PerfTestUnit &other);
ParallelPerfTestUnit(const ParallelPerfTestUnit &other);
ParallelPerfTestUnit& operator=(const ParallelPerfTestUnit & other);
};
} // perf
} // base
} // embb
#endif /* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */
#endif // EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_PERF_H_
#define EMBB_BASE_CPP_PERF_PERF_H_
#include <embb/base/perf/call_args.h>
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#define PT_PERF_MAIN(component) \
template <class Test> \
void PartestRunPerformanceTest(Test & test) { \
test.Run(); \
test.PrintReport(std::cout); \
} \
void PartestRunPerformanceTests( \
embb::base::perf::CallArgs & perf_test_params); \
int main(int argc, char** argv) { \
std::cout << component << ::std::endl; \
embb::base::perf::CallArgs perf_test_params; \
try { \
perf_test_params.Parse(argc, argv); \
} \
catch (::std::runtime_error & re) { \
::std::cerr << re.what() << ::std::endl; \
} \
perf_test_params.Print(::std::cout); \
PartestRunPerformanceTests(perf_test_params); \
} \
void PartestRunPerformanceTests( \
embb::base::perf::CallArgs & perf_test_params)
#define PT_PERF_RUN(PT_PERF_TEST) \
( \
(std::cout << "Running " << #PT_PERF_TEST << " ..." << std::endl), \
PartestRunPerformanceTest<PT_PERF_TEST>(PT_PERF_TEST(perf_test_params)), \
(std::cout << "Running " << #PT_PERF_TEST << " ..." << " done" << std::endl) \
)
#endif // EMBB_BASE_CPP_PERF_PERF_H_
\ No newline at end of file
......@@ -50,10 +50,7 @@ class PerformanceTest : public partest::TestCase {
* Constructs PerformanceTest.
*/
explicit PerformanceTest(const TestParams & params)
: partest::TestCase(), params_(params) {
// maximum one thread per available core
size_t threads = std::min<size_t>(params.MaxThreads(),
embb::base::CoreSet::CountAvailable());
: partest::TestCase(), params_(params) {
unit_ = &CreateUnit< PerfTestUnit<ParallelF, TestParams> >(params_);
}
......@@ -82,8 +79,8 @@ class PerformanceTest : public partest::TestCase {
PerformanceTest& operator=(const PerformanceTest &other);
};
} /* perf */
} /* base */
} /* embb */
} // perf
} // base
} // embb
#endif /* EMBB_BASE_PERF_PERFORMANCE_TEST_H_ */
#endif // EMBB_BASE_PERF_PERFORMANCE_TEST_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
#include <cmath>
#include <vector>
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/call_args.h>
#include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
#define THIS_DOMAIN_ID 1
#define THIS_NODE_ID 1
namespace embb {
namespace base {
namespace perf {
/**
* \defgroup CPP_BASE_PERF Performance Tests
*
* Performance Test Framework
*
* \ingroup CPP_BASE
*/
/**
* Performance Test Unit
*
* Base unit of any test (Speedup Test, Performance Test, ...). Takes a
* non-copyable Functor as template argument and executes it \c iteration_count
* times on \c thread_count worker threads.
*
* If \c thread_count equals 0, EMBB is not initialized and the Functor is
* executed without EMBB support.
*
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename SerialF>
class SerialPerfTestUnit : public partest::TestUnit {
public:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit SerialPerfTestUnit(const CallArgs & params) :
partest::TestUnit("SerialPerfTestUnit"),
params_(params),
duration_(0) {
func_ = new SerialF(params_);
Add(&SerialPerfTestUnit<SerialF>::Run, this);
}
/**
* Destructs SerialPerfTestUnit
*/
~SerialPerfTestUnit() {
delete func_;
}
/**
* Durations of single runs of this unit in microseconds.
* \return Vector of durations of single runs of this unit
* ordered by number of threads, in microseconds.
*/
double GetDuration() const {
return duration_;
}
private:
/**
* Run performance test
*/
void Run() {
func_->Pre();
Tic();
func_->Run();
Toc();
func_->Post();
}
/**
* Sets up EMBB and starts timer.
*/
void Tic() {
// start timer
timer_ = Timer();
}
/**
* Stops timer and resets EMBB
*/
void Toc() {
// stop timer
duration_ = timer_.Elapsed();
}
const CallArgs params_;
double duration_;
Timer timer_;
SerialF * func_;
// prohibit copy and assignment
SerialPerfTestUnit(const SerialPerfTestUnit & other);
SerialPerfTestUnit& operator=(const SerialPerfTestUnit & other);
};
} // perf
} // base
} // embb
#endif // EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
......@@ -31,7 +31,9 @@
#include <iomanip>
#include <partest/partest.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/perf_test_unit.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/perf/parallel_perf_test_unit.h>
#include <embb/base/perf/serial_perf_test_unit.h>
namespace embb {
namespace base {
......@@ -49,32 +51,18 @@ namespace perf {
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename ParallelF, typename SerialF>
template<typename SerialF, typename ParallelF>
class SpeedupTest : public partest::TestCase {
public:
/**
* Constructs SpeedupTest and creates test units.
*/
explicit SpeedupTest(
size_t max_thread_count = partest::TestSuite::GetDefaultNumThreads(),
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) :
partest::TestCase() {
/* maximum one thread per available core */
size_t threads = std::min<size_t>(
max_thread_count,
embb::base::CoreSet::CountAvailable());
std::cout << "Test configuration ------------------------------------" << std::endl;
std::cout << " Num threads: " << threads << std::endl;
std::cout << " Iterations: " << iteration_count << std::endl;
/* create unit for serial version */
ser_unit_ = &CreateUnit< PerfTestUnit<SerialF> >(0, iteration_count);
/* create log2(threads)+1 units for parallel version */
for (size_t i = 1; i <= threads; i = i * 2) {
par_units_.push_back(
&CreateUnit< PerfTestUnit<ParallelF> >(i, iteration_count));
}
explicit SpeedupTest(const embb::base::perf::CallArgs & params)
: partest::TestCase(), params_(params) {
// create unit for serial version
ser_unit_ = &CreateUnit< SerialPerfTestUnit<SerialF>, CallArgs >(params_);
// create unit for parallel version
par_unit_ = &CreateUnit< ParallelPerfTestUnit<ParallelF>, CallArgs >(params_);
}
/**
......@@ -87,31 +75,39 @@ class SpeedupTest : public partest::TestCase {
* Prints the durations of all units in comma separated format.
*/
void PrintReport(std::ostream & ostr) {
/* print sample row for sequential run (degree 0): */
double serial_duration = ser_unit_->GetDuration();
// print sample row for sequential run (degree 0):
ostr << "0,"
<< std::fixed << std::setprecision(2)
<< ser_unit_->GetDuration() << std::endl;
/* print sample rows for parallel runs (degree > 0): */
for (int i = 0; i < par_units_.size(); ++i) {
ostr << std::fixed << par_units_[i]->GetThreadCount()
<< serial_duration << ","
<< std::fixed << 1.0
<< std::endl;
// print sample rows for parallel runs (degree > 0):
std::vector < std::pair< unsigned int, double > > durations =
par_unit_->GetDurations();
for (unsigned int i = 0; i < durations.size(); ++i) {
ostr << std::fixed << durations[i].first
<< ","
<< std::fixed << std::setprecision(2)
<< par_units_[i]->GetDuration()
<< durations[i].second
<< ","
<< std::fixed << serial_duration / durations[i].second
<< std::endl;
}
}
private:
std::vector<PerfTestUnit<ParallelF> *> par_units_;
PerfTestUnit<SerialF> *ser_unit_;
const CallArgs & params_;
ParallelPerfTestUnit<ParallelF> * par_unit_;
SerialPerfTestUnit<SerialF> * ser_unit_;
/* prohibit copy and assignment */
SpeedupTest(const SpeedupTest &other);
SpeedupTest& operator=(const SpeedupTest &other);
};
} /* perf */
} /* base */
} /* embb */
} // perf
} // base
} // embb
#endif /* EMBB_BASE_PERF_SPEEDUP_TEST_H_ */
......@@ -37,17 +37,6 @@ void CallArgs::Parse(int argc, char * argv[]) {
counter_scale = scale_param;
}
}
// Element type:
if (std::string(argv[paramIndex]) == "-e") {
element_type = UNDEFINED_SCALAR_TYPE;
::std::string type = argv[paramIndex + 1];
if (type == "float") {
element_type = FLOAT;
}
else if (type == "double") {
element_type = DOUBLE;
}
}
// Stress type:
if (std::string(argv[paramIndex]) == "-s") {
stress_type = UNDEFINED_STRESS_TYPE;
......@@ -64,15 +53,6 @@ void CallArgs::Parse(int argc, char * argv[]) {
load_factor = static_cast<size_t>(
atoi(argv[paramIndex + 1]));
}
// Additional test parameter:
if (std::string(argv[paramIndex]) == "-p") {
parallel_base_ref = atoi(argv[paramIndex + 1]);
}
// Sanitizing and error handling:
if (element_type == UNDEFINED_SCALAR_TYPE) {
throw ::std::runtime_error(
"Invalid setting for element type (-e int|float|double)");
}
if (stress_type == UNDEFINED_STRESS_TYPE) {
throw ::std::runtime_error(
"Invalid setting for stress test type (-s ram|cpu)");
......@@ -88,9 +68,7 @@ void CallArgs::Print(std::ostream & os) {
os << "Max. threads: (-t) " << MaxThreads() << std::endl
<< "Vector size: (-n) " << VectorSize() << std::endl
<< "Load factor: (-l) " << LoadFactor() << std::endl
<< "Element type: (-e) " << ElementTypeName() << std::endl
<< "Stress mode: (-s) " << StressModeName() << std::endl
<< "Serial base ref: (-p) " << ParallelBaseReference() << std::endl
<< "Time sampling: (-f) " << embb::base::perf::Timer::TimerName()
<< std::endl;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment