Commit 517d459b by Tobias Fuchs

performance tests: finished refactoring of performance test framework

parent f9beb8b6
...@@ -79,9 +79,6 @@ ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args) ...@@ -79,9 +79,6 @@ ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args)
if (cargs.StressMode() == CallArgs::RAM_STRESS) { if (cargs.StressMode() == CallArgs::RAM_STRESS) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned( v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T))); vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
} else { } else {
v = 0; v = 0;
} }
...@@ -95,6 +92,16 @@ ParallelForEach<T>::~ParallelForEach() { ...@@ -95,6 +92,16 @@ ParallelForEach<T>::~ParallelForEach() {
} }
template<typename T> template<typename T>
void ParallelForEach<T>::Pre() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
}
template<typename T>
void ParallelForEach<T>::Run(unsigned int numThreads) { void ParallelForEach<T>::Run(unsigned int numThreads) {
if (cargs.StressMode() == CallArgs::CPU_STRESS) { if (cargs.StressMode() == CallArgs::CPU_STRESS) {
// Computing input values, no memory access // Computing input values, no memory access
......
...@@ -34,15 +34,13 @@ namespace embb { ...@@ -34,15 +34,13 @@ namespace embb {
namespace algorithms { namespace algorithms {
namespace perf { namespace perf {
using embb::base::perf::CallArgs;
/** /**
* Operation performed in each loop iteration. * Operation performed in each loop iteration.
*/ */
template<typename T> template<typename T>
class ForEachOp { class ForEachOp {
public: public:
explicit ForEachOp(const CallArgs & args) : explicit ForEachOp(const embb::base::perf::CallArgs & args) :
load_factor(args.LoadFactor()) { } load_factor(args.LoadFactor()) { }
void operator()(T & val) const { void operator()(T & val) const {
T x = val; T x = val;
...@@ -54,13 +52,13 @@ public: ...@@ -54,13 +52,13 @@ public:
val = x; val = x;
} }
private: private:
unsigned int load_factor; size_t load_factor;
}; };
template<typename T> template<typename T>
class ForEachFunctor { class ForEachFunctor {
public: public:
ForEachFunctor(const CallArgs & args) : ForEachFunctor(const embb::base::perf::CallArgs & args) :
cargs(args), op(args) { } cargs(args), op(args) { }
void operator()(T & value) const { void operator()(T & value) const {
op(value); op(value);
...@@ -75,7 +73,7 @@ public: ...@@ -75,7 +73,7 @@ public:
return *this; return *this;
} }
private: private:
const CallArgs & cargs; const embb::base::perf::CallArgs & cargs;
ForEachOp<T> op; ForEachOp<T> op;
}; };
...@@ -103,7 +101,7 @@ class ParallelForEach { ...@@ -103,7 +101,7 @@ class ParallelForEach {
public: public:
explicit ParallelForEach(const embb::base::perf::CallArgs & args); explicit ParallelForEach(const embb::base::perf::CallArgs & args);
~ParallelForEach(); ~ParallelForEach();
void Pre() { } void Pre();
void Run(unsigned int numThreads); void Run(unsigned int numThreads);
void Post() { } void Post() { }
......
...@@ -24,21 +24,8 @@ ...@@ -24,21 +24,8 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <partest/partest.h> #include <partest/partest.h>
#include <embb/base/core_set.h>
#include <embb/base/thread.h>
#include <embb/base/c/log.h>
#include <embb/base/c/internal/thread_index.h>
#include <embb/tasks/tasks.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/perf/timer.h>
#include <for_each_perf.h> #include <for_each_perf.h>
#include <reduce_perf.h> #include <reduce_perf.h>
#include <scan_perf.h> #include <scan_perf.h>
...@@ -46,159 +33,33 @@ ...@@ -46,159 +33,33 @@
#include <quick_sort_perf.h> #include <quick_sort_perf.h>
#include <merge_sort_perf.h> #include <merge_sort_perf.h>
#include <embb/base/perf/performance_test.h> #include <embb/base/perf/perf.h>
#include <embb/base/perf/speedup_test.h>
using namespace embb::algorithms::perf; using embb::algorithms::perf::SerialForEach;
using embb::algorithms::perf::SerialReduce;
using embb::algorithms::perf::SerialScan;
using embb::algorithms::perf::SerialCount;
using embb::algorithms::perf::SerialScan;
using embb::algorithms::perf::SerialMergeSort;
using embb::algorithms::perf::SerialQuickSort;
using embb::algorithms::perf::ParallelForEach;
using embb::algorithms::perf::ParallelReduce;
using embb::algorithms::perf::ParallelScan;
using embb::algorithms::perf::ParallelCount;
using embb::algorithms::perf::ParallelMergeSort;
using embb::algorithms::perf::ParallelQuickSort;
using embb::base::perf::Timer; using embb::base::perf::Timer;
using embb::base::perf::CallArgs; using embb::base::perf::CallArgs;
using embb::base::perf::PerformanceTest; using embb::base::perf::SpeedupTest;
#if 0
void ReportResult(
const std::string & name,
unsigned int threads,
const CallArgs & args,
double elapsed,
double speedup) {
std::cout
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
std::ofstream file;
std::string filename = "performance_tests_result.csv";
file.open(filename.c_str(), ::std::ofstream::out | ::std::ofstream::app);
file
<< name << ","
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
}
template< typename TestSerial, typename TestParallel >
void RunPerformanceTest(
const embb::base::perf::CallArgs & args,
const std::string & name) {
std::cout << "--- Running " << name << std::endl;
// Initialize new test instances:
TestParallel testParallel(args);
// Parallel runs:
unsigned int threads = 1;
// Base value to compute speedup; parallel execution
// with 1 thread or serial execution.
double baseDuration = 0;
// Whether to use serial or parallel exeuction using 1
// thread for speedup reference:
if (args.ParallelBaseReference() == 0) {
TestSerial testSerial(args);
// Serial run:
Timer t;
testSerial.Run();
double elapsed = t.Elapsed();
ReportResult(
name,
0,
args,
elapsed,
1.0);
baseDuration = elapsed;
}
threads += args.ParallelBaseReference();
// Run executions with incrementing number of threads:
embb_internal_thread_index_set_max(args.MaxThreads());
while(threads <= args.MaxThreads()) {
// Set number of available threads to given limit:
// embb::base::Thread::SetThreadsMaxCount(threads);
embb_internal_thread_index_reset();
// Configure cores to be used by EMBB:
embb::base::CoreSet cores(false);
for (unsigned int coreId = 0; coreId < threads; ++coreId) {
cores.Add(coreId);
}
embb::tasks::Node::Initialize(
1, 1, cores,
MTAPI_NODE_MAX_TASKS_DEFAULT * 8,
MTAPI_NODE_MAX_GROUPS_DEFAULT * 8,
MTAPI_NODE_MAX_QUEUES_DEFAULT * 8,
MTAPI_NODE_QUEUE_LIMIT_DEFAULT * 8,
MTAPI_NODE_MAX_PRIORITIES_DEFAULT);
// Test setup:
testParallel.Pre();
// Initialize timer:
Timer t;
// Run the test body:
testParallel.Run(threads);
// Report duration:
double elapsed = t.Elapsed();
if (threads == 1) {
baseDuration = elapsed;
}
ReportResult(
name,
threads,
args,
elapsed,
static_cast<double>(baseDuration) / static_cast<double>(elapsed));
// Test teardown:
testParallel.Post();
if (threads < 4) {
++threads;
} else {
threads += 4;
}
embb::tasks::Node::Finalize();
}
}
int main(int argc, char * argv[]) {
// Parse command line arguments:
embb::base::perf::CallArgs args;
try {
args.Parse(argc, argv);
} catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
RunPerformanceTest< SerialForEach<float>, ParallelForEach<float> >(args, "ForEach");
RunPerformanceTest< SerialReduce<float>, ParallelReduce<float> >(args, "Reduce");
RunPerformanceTest< SerialScan<float>, ParallelScan<float> >(args, "Scan");
RunPerformanceTest< SerialCount<float>, ParallelCount<float> >(args, "Count");
RunPerformanceTest< SerialQuickSort<float>, ParallelQuickSort<float> >(args, "Quicksort");
RunPerformanceTest< SerialMergeSort<float>, ParallelMergeSort<float> >(args, "Mergesort");
return 0;
}
#endif
int main(int argc, char * argv[]) { #define COMMA ,
// Parse command line arguments:
CallArgs args;
try {
args.Parse(argc, argv);
}
catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
PerformanceTest< SerialForEach<float>, ParallelForEach<float>, CallArgs >
test(args);
test.Run();
test.PrintReport(std::cout);
return 0; PT_PERF_MAIN("Algorithms") {
PT_PERF_RUN(SpeedupTest< SerialForEach<float> COMMA ParallelForEach<float> >);
PT_PERF_RUN(SpeedupTest< SerialReduce<float> COMMA ParallelReduce<float> >);
PT_PERF_RUN(SpeedupTest< SerialScan<float> COMMA ParallelScan<float> >);
PT_PERF_RUN(SpeedupTest< SerialCount<float> COMMA ParallelCount<float> >);
PT_PERF_RUN(SpeedupTest< SerialMergeSort<float> COMMA ParallelMergeSort<float> >);
PT_PERF_RUN(SpeedupTest< SerialQuickSort<float> COMMA ParallelQuickSort<float> >);
} }
...@@ -43,9 +43,6 @@ SerialMergeSort<T>::SerialMergeSort(const embb::base::perf::CallArgs & args) ...@@ -43,9 +43,6 @@ SerialMergeSort<T>::SerialMergeSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) { : cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned( v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T))); vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
} }
template<typename T> template<typename T>
...@@ -74,6 +71,14 @@ ParallelMergeSort<T>::~ParallelMergeSort() { ...@@ -74,6 +71,14 @@ ParallelMergeSort<T>::~ParallelMergeSort() {
} }
template<typename T> template<typename T>
void ParallelMergeSort<T>::Pre() {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
void ParallelMergeSort<T>::Run(unsigned int numThreads) { void ParallelMergeSort<T>::Run(unsigned int numThreads) {
// Always reading input values from memory, no CPU-only test possible // Always reading input values from memory, no CPU-only test possible
// as mergesort sorts in-place. // as mergesort sorts in-place.
......
...@@ -59,7 +59,7 @@ class ParallelMergeSort { ...@@ -59,7 +59,7 @@ class ParallelMergeSort {
public: public:
explicit ParallelMergeSort(const embb::base::perf::CallArgs & args); explicit ParallelMergeSort(const embb::base::perf::CallArgs & args);
~ParallelMergeSort(); ~ParallelMergeSort();
void Pre() { } void Pre();
void Run(unsigned int numThreads); void Run(unsigned int numThreads);
void Post() { } void Post() { }
......
...@@ -72,9 +72,6 @@ ParallelQuickSort<T>::ParallelQuickSort(const embb::base::perf::CallArgs & args) ...@@ -72,9 +72,6 @@ ParallelQuickSort<T>::ParallelQuickSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) { : cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned( v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T))); vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
} }
template<typename T> template<typename T>
...@@ -83,6 +80,14 @@ ParallelQuickSort<T>::~ParallelQuickSort() { ...@@ -83,6 +80,14 @@ ParallelQuickSort<T>::~ParallelQuickSort() {
} }
template<typename T> template<typename T>
void ParallelQuickSort<T>::Pre() {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
void ParallelQuickSort<T>::Run(unsigned int numThreads) { void ParallelQuickSort<T>::Run(unsigned int numThreads) {
// Always reading input values from memory, no CPU-only test possible // Always reading input values from memory, no CPU-only test possible
// as quicksort sorts in-place. // as quicksort sorts in-place.
......
...@@ -59,7 +59,7 @@ class ParallelQuickSort { ...@@ -59,7 +59,7 @@ class ParallelQuickSort {
public: public:
explicit ParallelQuickSort(const embb::base::perf::CallArgs & args); explicit ParallelQuickSort(const embb::base::perf::CallArgs & args);
~ParallelQuickSort(); ~ParallelQuickSort();
void Pre() { } void Pre();
void Run(unsigned int numThreads); void Run(unsigned int numThreads);
void Post() { } void Post() { }
......
...@@ -93,9 +93,6 @@ ParallelReduce<T>::ParallelReduce( ...@@ -93,9 +93,6 @@ ParallelReduce<T>::ParallelReduce(
v = static_cast<T*>( v = static_cast<T*>(
embb::base::Allocation::AllocateCacheAligned( embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T))); vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = (T)i;
}
} }
else { else {
v = 0; v = 0;
...@@ -110,6 +107,16 @@ ParallelReduce<T>::~ParallelReduce() { ...@@ -110,6 +107,16 @@ ParallelReduce<T>::~ParallelReduce() {
} }
template<typename T> template<typename T>
void ParallelReduce<T>::Pre() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Initialize input vector with incrementing values:
for (size_t i = 0; i < vector_size; i++) {
v[i] = (T)i;
}
}
}
template<typename T>
void ParallelReduce<T>::Run(unsigned int numThreads) { void ParallelReduce<T>::Run(unsigned int numThreads) {
TransformOp<T> op(static_cast<T>(1) / vector_size, cargs); TransformOp<T> op(static_cast<T>(1) / vector_size, cargs);
if (cargs.StressMode() == CallArgs::CPU_STRESS) { if (cargs.StressMode() == CallArgs::CPU_STRESS) {
......
...@@ -51,8 +51,8 @@ public: ...@@ -51,8 +51,8 @@ public:
T operator()(T val) const { T operator()(T val) const {
T x = 0; T x = 0;
// Simulate more complex operation depending on // Simulate more complex operation depending on
// load factor. Default load factor is 1. // load factor. Default load factor is 100.
for (size_t i = 0; i < load_factor * 10000; ++i) { for (size_t i = 0; i < load_factor * 10; ++i) {
x = (val + static_cast<T>(0.5)) * step_size * i; x = (val + static_cast<T>(0.5)) * step_size * i;
x = static_cast<T>(4.0 / (1.0 + x * x / load_factor)); x = static_cast<T>(4.0 / (1.0 + x * x / load_factor));
} }
...@@ -87,7 +87,7 @@ public: ...@@ -87,7 +87,7 @@ public:
explicit ParallelReduce( explicit ParallelReduce(
const embb::base::perf::CallArgs & args); const embb::base::perf::CallArgs & args);
~ParallelReduce(); ~ParallelReduce();
void Pre() { } void Pre();
void Run(unsigned int numThreads); void Run(unsigned int numThreads);
void Post() { } void Post() { }
......
...@@ -101,9 +101,6 @@ ParallelScan<T>::ParallelScan(const embb::base::perf::CallArgs & args) : ...@@ -101,9 +101,6 @@ ParallelScan<T>::ParallelScan(const embb::base::perf::CallArgs & args) :
if (cargs.StressMode() == CallArgs::RAM_STRESS) { if (cargs.StressMode() == CallArgs::RAM_STRESS) {
in = (T *) Allocation::AllocateCacheAligned( in = (T *) Allocation::AllocateCacheAligned(
vector_size * sizeof(T)); vector_size * sizeof(T));
for (size_t i = 0; i < vector_size; i++) {
in[i] = static_cast<T>(1);
}
} }
else { else {
in = 0; in = 0;
...@@ -120,6 +117,16 @@ ParallelScan<T>::~ParallelScan() { ...@@ -120,6 +117,16 @@ ParallelScan<T>::~ParallelScan() {
} }
template<typename T> template<typename T>
void ParallelScan<T>::Pre() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Initialize input vector with 1's:
for (size_t i = 0; i < vector_size; i++) {
in[i] = static_cast<T>(1);
}
}
}
template<typename T>
void ParallelScan<T>::Run(unsigned int numThreads) { void ParallelScan<T>::Run(unsigned int numThreads) {
if (cargs.StressMode() == CallArgs::CPU_STRESS) { if (cargs.StressMode() == CallArgs::CPU_STRESS) {
CpuStressScanOp<T> op(cargs); CpuStressScanOp<T> op(cargs);
......
...@@ -110,7 +110,7 @@ class ParallelScan { ...@@ -110,7 +110,7 @@ class ParallelScan {
public: public:
explicit ParallelScan(const embb::base::perf::CallArgs & args); explicit ParallelScan(const embb::base::perf::CallArgs & args);
~ParallelScan(); ~ParallelScan();
void Pre() { } void Pre();
void Run(unsigned int numThreads); void Run(unsigned int numThreads);
void Post() { } void Post() { }
......
...@@ -36,33 +36,30 @@ namespace base { ...@@ -36,33 +36,30 @@ namespace base {
namespace perf { namespace perf {
class CallArgs { class CallArgs {
public:
public:
typedef enum {
UNDEFINED_SCALAR_TYPE = 0,
FLOAT,
DOUBLE
} ScalarType;
typedef enum { typedef enum {
UNDEFINED_STRESS_TYPE = 0, UNDEFINED_STRESS_TYPE = 0,
RAM_STRESS, RAM_STRESS,
CPU_STRESS CPU_STRESS
} StressType; } StressType;
public: public:
inline CallArgs() : inline CallArgs() :
element_type(CallArgs::FLOAT),
stress_type(CallArgs::RAM_STRESS), stress_type(CallArgs::RAM_STRESS),
max_threads(embb::base::CoreSet::CountAvailable()), max_threads(embb::base::CoreSet::CountAvailable()),
vector_size(10000000), vector_size(10000000),
load_factor(100), load_factor(10),
parallel_base_ref(0),
counter_scale(0) { counter_scale(0) {
} }
inline CallArgs(const CallArgs & other) :
stress_type(other.stress_type),
max_threads(other.max_threads),
vector_size(other.vector_size),
load_factor(other.load_factor),
counter_scale(other.counter_scale) {
}
inline CallArgs(int argc, char * argv[]) { inline CallArgs(int argc, char * argv[]) {
Parse(argc, argv); Parse(argc, argv);
} }
...@@ -79,18 +76,6 @@ public: ...@@ -79,18 +76,6 @@ public:
return counter_scale; return counter_scale;
} }
inline ScalarType ElementType() const {
return element_type;
}
inline ::std::string ElementTypeName() const {
return ((ElementType() == UNDEFINED_SCALAR_TYPE)
? "undefined"
: ((ElementType() == FLOAT)
? "float"
: "double"));
}
inline StressType StressMode() const { inline StressType StressMode() const {
return stress_type; return stress_type;
} }
...@@ -109,20 +94,12 @@ public: ...@@ -109,20 +94,12 @@ public:
return load_factor; return load_factor;
} }
inline unsigned int ParallelBaseReference() const { private:
return parallel_base_ref;;
}
private:
ScalarType element_type;
StressType stress_type; StressType stress_type;
size_t max_threads; size_t max_threads;
size_t vector_size; size_t vector_size;
size_t load_factor; size_t load_factor;
unsigned int parallel_base_ref;
unsigned int counter_scale; unsigned int counter_scale;
}; };
} // namespace perf } // namespace perf
......
...@@ -24,14 +24,15 @@ ...@@ -24,14 +24,15 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef EMBB_BASE_PERF_PERF_TEST_UNIT_H_ #ifndef EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_PERF_TEST_UNIT_H_ #define EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
#include <cmath> #include <cmath>
#include <vector> #include <vector>
#include <partest/partest.h> #include <partest/partest.h>
#include <partest/test_unit.h> #include <partest/test_unit.h>
#include <embb/base/perf/timer.h> #include <embb/base/perf/timer.h>
#include <embb/base/perf/call_args.h>
#include <embb/tasks/tasks.h> #include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h> #include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h> #include <embb/base/c/internal/thread_index.h>
...@@ -63,56 +64,46 @@ namespace perf { ...@@ -63,56 +64,46 @@ namespace perf {
* \notthreadsafe * \notthreadsafe
* \ingroup CPP_BASE_PERF * \ingroup CPP_BASE_PERF
*/ */
template<typename F, class TestParams > template<typename ParallelF>
class PerfTestUnit : public partest::TestUnit { class ParallelPerfTestUnit : public partest::TestUnit {
public: public:
/** /**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F. * Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/ */
explicit PerfTestUnit(const TestParams & params) explicit ParallelPerfTestUnit(const CallArgs & params) :
: partest::TestUnit("PTU"), partest::TestUnit("ParallelPerfTestUnit"),
params_(params), params_(params) {
duration_(0) { func_ = new ParallelF(params_);
func = new F(params_); Add(&ParallelPerfTestUnit<ParallelF>::Run, this);
// Add(&PerfTestUnit<F, TestParams>::Run, this);
} }
/** /**
* Destructs PerfTestUnit * Destructs ParallelPerfTestUnit
*/ */
~PerfTestUnit() { ~ParallelPerfTestUnit() {
delete func; delete func_;
} }
/** /**
* Returns duration of this unit in microseconds. * Durations of single runs of this unit in microseconds.
* \return Duration of this unit in microseconds. * \return Vector of durations of single runs of this unit
* ordered by number of threads, in microseconds.
*/ */
double GetDuration() const { return duration_; } const std::vector< std::pair<unsigned int, double> > & GetDurations() const {
return durations_;
#if 0 }
/**
* Returns thread count of this unit.
* \return Thread count of this unit.
*/
size_t GetThreadCount() const { return thread_count_; }
private:
/** /**
* Returns iteration count of this unit. * Run performance test
* \return Iteration count of this unit.
*/ */
size_t GetIterationCount() const { return iteration_count_; }
#endif
private:
void Run() { void Run() {
for (unsigned int num_threads = 1; for (unsigned int num_threads = 1; num_threads <= params_.MaxThreads();) {
num_threads < params_.MaxThreads();) { func_->Pre();
func->Pre();
Tic(); Tic();
func->Run(num_threads); func_->Run(num_threads);
Toc(); Toc(num_threads);
func->Post(); func_->Post();
if (num_threads < 4) { if (num_threads < 4) {
num_threads++; num_threads++;
} else { } else {
...@@ -149,26 +140,25 @@ class PerfTestUnit : public partest::TestUnit { ...@@ -149,26 +140,25 @@ class PerfTestUnit : public partest::TestUnit {
/** /**
* Stops timer and resets EMBB * Stops timer and resets EMBB
*/ */
void Toc() { void Toc(unsigned int num_threads) {
// stop timer // stop timer
duration_ = timer_.Elapsed(); durations_.push_back(
std::make_pair(num_threads, timer_.Elapsed()));
embb::tasks::Node::Finalize(); embb::tasks::Node::Finalize();
} }
const TestParams & params_; const CallArgs params_;
double duration_; std::vector< std::pair<unsigned int, double> > durations_;
//size_t thread_count_;
//size_t iteration_count_;
Timer timer_; Timer timer_;
F *func; ParallelF * func_;
// prohibit copy and assignment // prohibit copy and assignment
PerfTestUnit(const PerfTestUnit &other); ParallelPerfTestUnit(const ParallelPerfTestUnit &other);
PerfTestUnit& operator=(const PerfTestUnit &other); ParallelPerfTestUnit& operator=(const ParallelPerfTestUnit & other);
}; };
} // perf } // perf
} // base } // base
} // embb } // embb
#endif /* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */ #endif // EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_PERF_H_
#define EMBB_BASE_CPP_PERF_PERF_H_
#include <embb/base/perf/call_args.h>
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#define PT_PERF_MAIN(component) \
template <class Test> \
void PartestRunPerformanceTest(Test & test) { \
test.Run(); \
test.PrintReport(std::cout); \
} \
void PartestRunPerformanceTests( \
embb::base::perf::CallArgs & perf_test_params); \
int main(int argc, char** argv) { \
std::cout << component << ::std::endl; \
embb::base::perf::CallArgs perf_test_params; \
try { \
perf_test_params.Parse(argc, argv); \
} \
catch (::std::runtime_error & re) { \
::std::cerr << re.what() << ::std::endl; \
} \
perf_test_params.Print(::std::cout); \
PartestRunPerformanceTests(perf_test_params); \
} \
void PartestRunPerformanceTests( \
embb::base::perf::CallArgs & perf_test_params)
#define PT_PERF_RUN(PT_PERF_TEST) \
( \
(std::cout << "Running " << #PT_PERF_TEST << " ..." << std::endl), \
PartestRunPerformanceTest<PT_PERF_TEST>(PT_PERF_TEST(perf_test_params)), \
(std::cout << "Running " << #PT_PERF_TEST << " ..." << " done" << std::endl) \
)
#endif // EMBB_BASE_CPP_PERF_PERF_H_
\ No newline at end of file
...@@ -51,9 +51,6 @@ class PerformanceTest : public partest::TestCase { ...@@ -51,9 +51,6 @@ class PerformanceTest : public partest::TestCase {
*/ */
explicit PerformanceTest(const TestParams & params) explicit PerformanceTest(const TestParams & params)
: partest::TestCase(), params_(params) { : partest::TestCase(), params_(params) {
// maximum one thread per available core
size_t threads = std::min<size_t>(params.MaxThreads(),
embb::base::CoreSet::CountAvailable());
unit_ = &CreateUnit< PerfTestUnit<ParallelF, TestParams> >(params_); unit_ = &CreateUnit< PerfTestUnit<ParallelF, TestParams> >(params_);
} }
...@@ -82,8 +79,8 @@ class PerformanceTest : public partest::TestCase { ...@@ -82,8 +79,8 @@ class PerformanceTest : public partest::TestCase {
PerformanceTest& operator=(const PerformanceTest &other); PerformanceTest& operator=(const PerformanceTest &other);
}; };
} /* perf */ } // perf
} /* base */ } // base
} /* embb */ } // embb
#endif /* EMBB_BASE_PERF_PERFORMANCE_TEST_H_ */ #endif // EMBB_BASE_PERF_PERFORMANCE_TEST_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
#include <cmath>
#include <vector>
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/call_args.h>
#include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
#define THIS_DOMAIN_ID 1
#define THIS_NODE_ID 1
namespace embb {
namespace base {
namespace perf {
/**
* \defgroup CPP_BASE_PERF Performance Tests
*
* Performance Test Framework
*
* \ingroup CPP_BASE
*/
/**
* Performance Test Unit
*
* Base unit of any test (Speedup Test, Performance Test, ...). Takes a
* non-copyable Functor as template argument and executes it \c iteration_count
* times on \c thread_count worker threads.
*
* If \c thread_count equals 0, EMBB is not initialized and the Functor is
* executed without EMBB support.
*
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename SerialF>
class SerialPerfTestUnit : public partest::TestUnit {
public:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit SerialPerfTestUnit(const CallArgs & params) :
partest::TestUnit("SerialPerfTestUnit"),
params_(params),
duration_(0) {
func_ = new SerialF(params_);
Add(&SerialPerfTestUnit<SerialF>::Run, this);
}
/**
* Destructs SerialPerfTestUnit
*/
~SerialPerfTestUnit() {
delete func_;
}
/**
* Durations of single runs of this unit in microseconds.
* \return Vector of durations of single runs of this unit
* ordered by number of threads, in microseconds.
*/
double GetDuration() const {
return duration_;
}
private:
/**
* Run performance test
*/
void Run() {
func_->Pre();
Tic();
func_->Run();
Toc();
func_->Post();
}
/**
* Sets up EMBB and starts timer.
*/
void Tic() {
// start timer
timer_ = Timer();
}
/**
* Stops timer and resets EMBB
*/
void Toc() {
// stop timer
duration_ = timer_.Elapsed();
}
const CallArgs params_;
double duration_;
Timer timer_;
SerialF * func_;
// prohibit copy and assignment
SerialPerfTestUnit(const SerialPerfTestUnit & other);
SerialPerfTestUnit& operator=(const SerialPerfTestUnit & other);
};
} // perf
} // base
} // embb
#endif // EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_
...@@ -31,7 +31,9 @@ ...@@ -31,7 +31,9 @@
#include <iomanip> #include <iomanip>
#include <partest/partest.h> #include <partest/partest.h>
#include <embb/base/perf/timer.h> #include <embb/base/perf/timer.h>
#include <embb/base/perf/perf_test_unit.h> #include <embb/base/perf/call_args.h>
#include <embb/base/perf/parallel_perf_test_unit.h>
#include <embb/base/perf/serial_perf_test_unit.h>
namespace embb { namespace embb {
namespace base { namespace base {
...@@ -49,32 +51,18 @@ namespace perf { ...@@ -49,32 +51,18 @@ namespace perf {
* \notthreadsafe * \notthreadsafe
* \ingroup CPP_BASE_PERF * \ingroup CPP_BASE_PERF
*/ */
template<typename ParallelF, typename SerialF> template<typename SerialF, typename ParallelF>
class SpeedupTest : public partest::TestCase { class SpeedupTest : public partest::TestCase {
public: public:
/** /**
* Constructs SpeedupTest and creates test units. * Constructs SpeedupTest and creates test units.
*/ */
explicit SpeedupTest( explicit SpeedupTest(const embb::base::perf::CallArgs & params)
size_t max_thread_count = partest::TestSuite::GetDefaultNumThreads(), : partest::TestCase(), params_(params) {
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) : // create unit for serial version
partest::TestCase() { ser_unit_ = &CreateUnit< SerialPerfTestUnit<SerialF>, CallArgs >(params_);
/* maximum one thread per available core */ // create unit for parallel version
size_t threads = std::min<size_t>( par_unit_ = &CreateUnit< ParallelPerfTestUnit<ParallelF>, CallArgs >(params_);
max_thread_count,
embb::base::CoreSet::CountAvailable());
std::cout << "Test configuration ------------------------------------" << std::endl;
std::cout << " Num threads: " << threads << std::endl;
std::cout << " Iterations: " << iteration_count << std::endl;
/* create unit for serial version */
ser_unit_ = &CreateUnit< PerfTestUnit<SerialF> >(0, iteration_count);
/* create log2(threads)+1 units for parallel version */
for (size_t i = 1; i <= threads; i = i * 2) {
par_units_.push_back(
&CreateUnit< PerfTestUnit<ParallelF> >(i, iteration_count));
}
} }
/** /**
...@@ -87,31 +75,39 @@ class SpeedupTest : public partest::TestCase { ...@@ -87,31 +75,39 @@ class SpeedupTest : public partest::TestCase {
* Prints the durations of all units in comma separated format. * Prints the durations of all units in comma separated format.
*/ */
void PrintReport(std::ostream & ostr) { void PrintReport(std::ostream & ostr) {
/* print sample row for sequential run (degree 0): */ double serial_duration = ser_unit_->GetDuration();
// print sample row for sequential run (degree 0):
ostr << "0," ostr << "0,"
<< std::fixed << std::setprecision(2) << std::fixed << std::setprecision(2)
<< ser_unit_->GetDuration() << std::endl; << serial_duration << ","
/* print sample rows for parallel runs (degree > 0): */ << std::fixed << 1.0
for (int i = 0; i < par_units_.size(); ++i) { << std::endl;
ostr << std::fixed << par_units_[i]->GetThreadCount() // print sample rows for parallel runs (degree > 0):
std::vector < std::pair< unsigned int, double > > durations =
par_unit_->GetDurations();
for (unsigned int i = 0; i < durations.size(); ++i) {
ostr << std::fixed << durations[i].first
<< "," << ","
<< std::fixed << std::setprecision(2) << std::fixed << std::setprecision(2)
<< par_units_[i]->GetDuration() << durations[i].second
<< ","
<< std::fixed << serial_duration / durations[i].second
<< std::endl; << std::endl;
} }
} }
private: private:
std::vector<PerfTestUnit<ParallelF> *> par_units_; const CallArgs & params_;
PerfTestUnit<SerialF> *ser_unit_; ParallelPerfTestUnit<ParallelF> * par_unit_;
SerialPerfTestUnit<SerialF> * ser_unit_;
/* prohibit copy and assignment */ /* prohibit copy and assignment */
SpeedupTest(const SpeedupTest &other); SpeedupTest(const SpeedupTest &other);
SpeedupTest& operator=(const SpeedupTest &other); SpeedupTest& operator=(const SpeedupTest &other);
}; };
} /* perf */ } // perf
} /* base */ } // base
} /* embb */ } // embb
#endif /* EMBB_BASE_PERF_SPEEDUP_TEST_H_ */ #endif /* EMBB_BASE_PERF_SPEEDUP_TEST_H_ */
...@@ -37,17 +37,6 @@ void CallArgs::Parse(int argc, char * argv[]) { ...@@ -37,17 +37,6 @@ void CallArgs::Parse(int argc, char * argv[]) {
counter_scale = scale_param; counter_scale = scale_param;
} }
} }
// Element type:
if (std::string(argv[paramIndex]) == "-e") {
element_type = UNDEFINED_SCALAR_TYPE;
::std::string type = argv[paramIndex + 1];
if (type == "float") {
element_type = FLOAT;
}
else if (type == "double") {
element_type = DOUBLE;
}
}
// Stress type: // Stress type:
if (std::string(argv[paramIndex]) == "-s") { if (std::string(argv[paramIndex]) == "-s") {
stress_type = UNDEFINED_STRESS_TYPE; stress_type = UNDEFINED_STRESS_TYPE;
...@@ -64,15 +53,6 @@ void CallArgs::Parse(int argc, char * argv[]) { ...@@ -64,15 +53,6 @@ void CallArgs::Parse(int argc, char * argv[]) {
load_factor = static_cast<size_t>( load_factor = static_cast<size_t>(
atoi(argv[paramIndex + 1])); atoi(argv[paramIndex + 1]));
} }
// Additional test parameter:
if (std::string(argv[paramIndex]) == "-p") {
parallel_base_ref = atoi(argv[paramIndex + 1]);
}
// Sanitizing and error handling:
if (element_type == UNDEFINED_SCALAR_TYPE) {
throw ::std::runtime_error(
"Invalid setting for element type (-e int|float|double)");
}
if (stress_type == UNDEFINED_STRESS_TYPE) { if (stress_type == UNDEFINED_STRESS_TYPE) {
throw ::std::runtime_error( throw ::std::runtime_error(
"Invalid setting for stress test type (-s ram|cpu)"); "Invalid setting for stress test type (-s ram|cpu)");
...@@ -88,9 +68,7 @@ void CallArgs::Print(std::ostream & os) { ...@@ -88,9 +68,7 @@ void CallArgs::Print(std::ostream & os) {
os << "Max. threads: (-t) " << MaxThreads() << std::endl os << "Max. threads: (-t) " << MaxThreads() << std::endl
<< "Vector size: (-n) " << VectorSize() << std::endl << "Vector size: (-n) " << VectorSize() << std::endl
<< "Load factor: (-l) " << LoadFactor() << std::endl << "Load factor: (-l) " << LoadFactor() << std::endl
<< "Element type: (-e) " << ElementTypeName() << std::endl
<< "Stress mode: (-s) " << StressModeName() << std::endl << "Stress mode: (-s) " << StressModeName() << std::endl
<< "Serial base ref: (-p) " << ParallelBaseReference() << std::endl
<< "Time sampling: (-f) " << embb::base::perf::Timer::TimerName() << "Time sampling: (-f) " << embb::base::perf::Timer::TimerName()
<< std::endl; << std::endl;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment