diff --git a/algorithms_cpp/perf/for_each_perf-inl.h b/algorithms_cpp/perf/for_each_perf-inl.h index 0933d98..614d0b1 100644 --- a/algorithms_cpp/perf/for_each_perf-inl.h +++ b/algorithms_cpp/perf/for_each_perf-inl.h @@ -79,9 +79,6 @@ ParallelForEach::ParallelForEach(const embb::base::perf::CallArgs & args) if (cargs.StressMode() == CallArgs::RAM_STRESS) { v = static_cast(embb::base::Allocation::AllocateCacheAligned( vector_size * sizeof(T))); - for (size_t i = 0; i < vector_size; i++) { - v[i] = static_cast(i); - } } else { v = 0; } @@ -95,6 +92,16 @@ ParallelForEach::~ParallelForEach() { } template +void ParallelForEach::Pre() { + if (cargs.StressMode() == CallArgs::RAM_STRESS) { + // Initialize input vector with incrementing values: + for (size_t i = 0; i < vector_size; i++) { + v[i] = static_cast(i); + } + } +} + +template void ParallelForEach::Run(unsigned int numThreads) { if (cargs.StressMode() == CallArgs::CPU_STRESS) { // Computing input values, no memory access diff --git a/algorithms_cpp/perf/for_each_perf.h b/algorithms_cpp/perf/for_each_perf.h index 86d6ca4..56fb3c7 100644 --- a/algorithms_cpp/perf/for_each_perf.h +++ b/algorithms_cpp/perf/for_each_perf.h @@ -34,15 +34,13 @@ namespace embb { namespace algorithms { namespace perf { -using embb::base::perf::CallArgs; - /** * Operation performed in each loop iteration. */ template class ForEachOp { public: - explicit ForEachOp(const CallArgs & args) : + explicit ForEachOp(const embb::base::perf::CallArgs & args) : load_factor(args.LoadFactor()) { } void operator()(T & val) const { T x = val; @@ -54,13 +52,13 @@ public: val = x; } private: - unsigned int load_factor; + size_t load_factor; }; template class ForEachFunctor { public: - ForEachFunctor(const CallArgs & args) : + ForEachFunctor(const embb::base::perf::CallArgs & args) : cargs(args), op(args) { } void operator()(T & value) const { op(value); @@ -75,7 +73,7 @@ public: return *this; } private: - const CallArgs & cargs; + const embb::base::perf::CallArgs & cargs; ForEachOp op; }; @@ -103,7 +101,7 @@ class ParallelForEach { public: explicit ParallelForEach(const embb::base::perf::CallArgs & args); ~ParallelForEach(); - void Pre() { } + void Pre(); void Run(unsigned int numThreads); void Post() { } diff --git a/algorithms_cpp/perf/main.cc b/algorithms_cpp/perf/main.cc index ad7309b..d92be20 100644 --- a/algorithms_cpp/perf/main.cc +++ b/algorithms_cpp/perf/main.cc @@ -24,21 +24,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include -#include #include -#include -#include -#include -#include -#include - -#include -#include - #include #include #include @@ -46,159 +33,33 @@ #include #include -#include +#include +#include -using namespace embb::algorithms::perf; +using embb::algorithms::perf::SerialForEach; +using embb::algorithms::perf::SerialReduce; +using embb::algorithms::perf::SerialScan; +using embb::algorithms::perf::SerialCount; +using embb::algorithms::perf::SerialScan; +using embb::algorithms::perf::SerialMergeSort; +using embb::algorithms::perf::SerialQuickSort; +using embb::algorithms::perf::ParallelForEach; +using embb::algorithms::perf::ParallelReduce; +using embb::algorithms::perf::ParallelScan; +using embb::algorithms::perf::ParallelCount; +using embb::algorithms::perf::ParallelMergeSort; +using embb::algorithms::perf::ParallelQuickSort; using embb::base::perf::Timer; using embb::base::perf::CallArgs; -using embb::base::perf::PerformanceTest; - -#if 0 - -void ReportResult( - const std::string & name, - unsigned int threads, - const CallArgs & args, - double elapsed, - double speedup) { - std::cout - << args.VectorSize() << "," - << args.ElementTypeName() << "," - << args.LoadFactor() << "," - << args.StressModeName() << "," - << threads << "," - << std::fixed << elapsed << "," - << std::setprecision(3) << speedup - << std::endl; - std::ofstream file; - std::string filename = "performance_tests_result.csv"; - file.open(filename.c_str(), ::std::ofstream::out | ::std::ofstream::app); - file - << name << "," - << args.VectorSize() << "," - << args.ElementTypeName() << "," - << args.LoadFactor() << "," - << args.StressModeName() << "," - << threads << "," - << std::fixed << elapsed << "," - << std::setprecision(3) << speedup - << std::endl; -} +using embb::base::perf::SpeedupTest; -template< typename TestSerial, typename TestParallel > -void RunPerformanceTest( - const embb::base::perf::CallArgs & args, - const std::string & name) { - std::cout << "--- Running " << name << std::endl; - // Initialize new test instances: - TestParallel testParallel(args); - // Parallel runs: - unsigned int threads = 1; - // Base value to compute speedup; parallel execution - // with 1 thread or serial execution. - double baseDuration = 0; +#define COMMA , - // Whether to use serial or parallel exeuction using 1 - // thread for speedup reference: - if (args.ParallelBaseReference() == 0) { - TestSerial testSerial(args); - // Serial run: - Timer t; - testSerial.Run(); - double elapsed = t.Elapsed(); - ReportResult( - name, - 0, - args, - elapsed, - 1.0); - baseDuration = elapsed; - } - threads += args.ParallelBaseReference(); - // Run executions with incrementing number of threads: - embb_internal_thread_index_set_max(args.MaxThreads()); - while(threads <= args.MaxThreads()) { - // Set number of available threads to given limit: - // embb::base::Thread::SetThreadsMaxCount(threads); - embb_internal_thread_index_reset(); - // Configure cores to be used by EMBB: - embb::base::CoreSet cores(false); - for (unsigned int coreId = 0; coreId < threads; ++coreId) { - cores.Add(coreId); - } - embb::tasks::Node::Initialize( - 1, 1, cores, - MTAPI_NODE_MAX_TASKS_DEFAULT * 8, - MTAPI_NODE_MAX_GROUPS_DEFAULT * 8, - MTAPI_NODE_MAX_QUEUES_DEFAULT * 8, - MTAPI_NODE_QUEUE_LIMIT_DEFAULT * 8, - MTAPI_NODE_MAX_PRIORITIES_DEFAULT); - // Test setup: - testParallel.Pre(); - // Initialize timer: - Timer t; - // Run the test body: - testParallel.Run(threads); - // Report duration: - double elapsed = t.Elapsed(); - if (threads == 1) { - baseDuration = elapsed; - } - ReportResult( - name, - threads, - args, - elapsed, - static_cast(baseDuration) / static_cast(elapsed)); - // Test teardown: - testParallel.Post(); - if (threads < 4) { - ++threads; - } else { - threads += 4; - } - embb::tasks::Node::Finalize(); - } +PT_PERF_MAIN("Algorithms") { + PT_PERF_RUN(SpeedupTest< SerialForEach COMMA ParallelForEach >); + PT_PERF_RUN(SpeedupTest< SerialReduce COMMA ParallelReduce >); + PT_PERF_RUN(SpeedupTest< SerialScan COMMA ParallelScan >); + PT_PERF_RUN(SpeedupTest< SerialCount COMMA ParallelCount >); + PT_PERF_RUN(SpeedupTest< SerialMergeSort COMMA ParallelMergeSort >); + PT_PERF_RUN(SpeedupTest< SerialQuickSort COMMA ParallelQuickSort >); } - -int main(int argc, char * argv[]) { - // Parse command line arguments: - embb::base::perf::CallArgs args; - try { - args.Parse(argc, argv); - } catch (::std::runtime_error & re) { - ::std::cerr << re.what() << ::std::endl; - } - // Print test settings: - args.Print(::std::cout); - // Run tests: - RunPerformanceTest< SerialForEach, ParallelForEach >(args, "ForEach"); - RunPerformanceTest< SerialReduce, ParallelReduce >(args, "Reduce"); - RunPerformanceTest< SerialScan, ParallelScan >(args, "Scan"); - RunPerformanceTest< SerialCount, ParallelCount >(args, "Count"); - RunPerformanceTest< SerialQuickSort, ParallelQuickSort >(args, "Quicksort"); - RunPerformanceTest< SerialMergeSort, ParallelMergeSort >(args, "Mergesort"); - return 0; -} - -#endif - -int main(int argc, char * argv[]) { - // Parse command line arguments: - CallArgs args; - try { - args.Parse(argc, argv); - } - catch (::std::runtime_error & re) { - ::std::cerr << re.what() << ::std::endl; - } - // Print test settings: - args.Print(::std::cout); - // Run tests: - PerformanceTest< SerialForEach, ParallelForEach, CallArgs > - test(args); - test.Run(); - test.PrintReport(std::cout); - - return 0; -} \ No newline at end of file diff --git a/algorithms_cpp/perf/merge_sort_perf-inl.h b/algorithms_cpp/perf/merge_sort_perf-inl.h index 04e62f8..082ec6e 100644 --- a/algorithms_cpp/perf/merge_sort_perf-inl.h +++ b/algorithms_cpp/perf/merge_sort_perf-inl.h @@ -43,9 +43,6 @@ SerialMergeSort::SerialMergeSort(const embb::base::perf::CallArgs & args) : cargs(args), vector_size(args.VectorSize()) { v = static_cast(embb::base::Allocation::AllocateCacheAligned( vector_size * sizeof(T))); - for (size_t i = 0; i < vector_size; i++) { - v[i] = static_cast(i); - } } template @@ -74,6 +71,14 @@ ParallelMergeSort::~ParallelMergeSort() { } template +void ParallelMergeSort::Pre() { + // Initialize input vector with incrementing values: + for (size_t i = 0; i < vector_size; i++) { + v[i] = static_cast(i); + } +} + +template void ParallelMergeSort::Run(unsigned int numThreads) { // Always reading input values from memory, no CPU-only test possible // as mergesort sorts in-place. diff --git a/algorithms_cpp/perf/merge_sort_perf.h b/algorithms_cpp/perf/merge_sort_perf.h index f887097..a9ab541 100644 --- a/algorithms_cpp/perf/merge_sort_perf.h +++ b/algorithms_cpp/perf/merge_sort_perf.h @@ -59,7 +59,7 @@ class ParallelMergeSort { public: explicit ParallelMergeSort(const embb::base::perf::CallArgs & args); ~ParallelMergeSort(); - void Pre() { } + void Pre(); void Run(unsigned int numThreads); void Post() { } diff --git a/algorithms_cpp/perf/quick_sort_perf-inl.h b/algorithms_cpp/perf/quick_sort_perf-inl.h index c47fcb5..081594d 100644 --- a/algorithms_cpp/perf/quick_sort_perf-inl.h +++ b/algorithms_cpp/perf/quick_sort_perf-inl.h @@ -72,9 +72,6 @@ ParallelQuickSort::ParallelQuickSort(const embb::base::perf::CallArgs & args) : cargs(args), vector_size(args.VectorSize()) { v = static_cast(embb::base::Allocation::AllocateCacheAligned( vector_size * sizeof(T))); - for (size_t i = 0; i < vector_size; i++) { - v[i] = static_cast(i); - } } template @@ -83,6 +80,14 @@ ParallelQuickSort::~ParallelQuickSort() { } template +void ParallelQuickSort::Pre() { + // Initialize input vector with incrementing values: + for (size_t i = 0; i < vector_size; i++) { + v[i] = static_cast(i); + } +} + +template void ParallelQuickSort::Run(unsigned int numThreads) { // Always reading input values from memory, no CPU-only test possible // as quicksort sorts in-place. diff --git a/algorithms_cpp/perf/quick_sort_perf.h b/algorithms_cpp/perf/quick_sort_perf.h index 9ab8284..2e72c39 100644 --- a/algorithms_cpp/perf/quick_sort_perf.h +++ b/algorithms_cpp/perf/quick_sort_perf.h @@ -59,7 +59,7 @@ class ParallelQuickSort { public: explicit ParallelQuickSort(const embb::base::perf::CallArgs & args); ~ParallelQuickSort(); - void Pre() { } + void Pre(); void Run(unsigned int numThreads); void Post() { } diff --git a/algorithms_cpp/perf/reduce_perf-inl.h b/algorithms_cpp/perf/reduce_perf-inl.h index 2a7778c..3954c65 100644 --- a/algorithms_cpp/perf/reduce_perf-inl.h +++ b/algorithms_cpp/perf/reduce_perf-inl.h @@ -93,9 +93,6 @@ ParallelReduce::ParallelReduce( v = static_cast( embb::base::Allocation::AllocateCacheAligned( vector_size * sizeof(T))); - for (size_t i = 0; i < vector_size; i++) { - v[i] = (T)i; - } } else { v = 0; @@ -110,6 +107,16 @@ ParallelReduce::~ParallelReduce() { } template +void ParallelReduce::Pre() { + if (cargs.StressMode() == CallArgs::RAM_STRESS) { + // Initialize input vector with incrementing values: + for (size_t i = 0; i < vector_size; i++) { + v[i] = (T)i; + } + } +} + +template void ParallelReduce::Run(unsigned int numThreads) { TransformOp op(static_cast(1) / vector_size, cargs); if (cargs.StressMode() == CallArgs::CPU_STRESS) { diff --git a/algorithms_cpp/perf/reduce_perf.h b/algorithms_cpp/perf/reduce_perf.h index b6086d9..2b3335a 100644 --- a/algorithms_cpp/perf/reduce_perf.h +++ b/algorithms_cpp/perf/reduce_perf.h @@ -51,8 +51,8 @@ public: T operator()(T val) const { T x = 0; // Simulate more complex operation depending on - // load factor. Default load factor is 1. - for (size_t i = 0; i < load_factor * 10000; ++i) { + // load factor. Default load factor is 100. + for (size_t i = 0; i < load_factor * 10; ++i) { x = (val + static_cast(0.5)) * step_size * i; x = static_cast(4.0 / (1.0 + x * x / load_factor)); } @@ -87,7 +87,7 @@ public: explicit ParallelReduce( const embb::base::perf::CallArgs & args); ~ParallelReduce(); - void Pre() { } + void Pre(); void Run(unsigned int numThreads); void Post() { } diff --git a/algorithms_cpp/perf/scan_perf-inl.h b/algorithms_cpp/perf/scan_perf-inl.h index 0cd3aa1..1d5aa60 100644 --- a/algorithms_cpp/perf/scan_perf-inl.h +++ b/algorithms_cpp/perf/scan_perf-inl.h @@ -101,9 +101,6 @@ ParallelScan::ParallelScan(const embb::base::perf::CallArgs & args) : if (cargs.StressMode() == CallArgs::RAM_STRESS) { in = (T *) Allocation::AllocateCacheAligned( vector_size * sizeof(T)); - for (size_t i = 0; i < vector_size; i++) { - in[i] = static_cast(1); - } } else { in = 0; @@ -120,6 +117,16 @@ ParallelScan::~ParallelScan() { } template +void ParallelScan::Pre() { + if (cargs.StressMode() == CallArgs::RAM_STRESS) { + // Initialize input vector with 1's: + for (size_t i = 0; i < vector_size; i++) { + in[i] = static_cast(1); + } + } +} + +template void ParallelScan::Run(unsigned int numThreads) { if (cargs.StressMode() == CallArgs::CPU_STRESS) { CpuStressScanOp op(cargs); diff --git a/algorithms_cpp/perf/scan_perf.h b/algorithms_cpp/perf/scan_perf.h index 5f7b4c9..16b5332 100644 --- a/algorithms_cpp/perf/scan_perf.h +++ b/algorithms_cpp/perf/scan_perf.h @@ -110,7 +110,7 @@ class ParallelScan { public: explicit ParallelScan(const embb::base::perf::CallArgs & args); ~ParallelScan(); - void Pre() { } + void Pre(); void Run(unsigned int numThreads); void Post() { } diff --git a/base_cpp/perf/include/embb/base/perf/call_args.h b/base_cpp/perf/include/embb/base/perf/call_args.h index 0dfa9a6..f1e2689 100644 --- a/base_cpp/perf/include/embb/base/perf/call_args.h +++ b/base_cpp/perf/include/embb/base/perf/call_args.h @@ -36,33 +36,30 @@ namespace base { namespace perf { class CallArgs { - -public: - - typedef enum { - UNDEFINED_SCALAR_TYPE = 0, - FLOAT, - DOUBLE - } ScalarType; - + public: typedef enum { UNDEFINED_STRESS_TYPE = 0, RAM_STRESS, CPU_STRESS } StressType; -public: - + public: inline CallArgs() : - element_type(CallArgs::FLOAT), stress_type(CallArgs::RAM_STRESS), max_threads(embb::base::CoreSet::CountAvailable()), vector_size(10000000), - load_factor(100), - parallel_base_ref(0), + load_factor(10), counter_scale(0) { } + inline CallArgs(const CallArgs & other) : + stress_type(other.stress_type), + max_threads(other.max_threads), + vector_size(other.vector_size), + load_factor(other.load_factor), + counter_scale(other.counter_scale) { + } + inline CallArgs(int argc, char * argv[]) { Parse(argc, argv); } @@ -79,18 +76,6 @@ public: return counter_scale; } - inline ScalarType ElementType() const { - return element_type; - } - - inline ::std::string ElementTypeName() const { - return ((ElementType() == UNDEFINED_SCALAR_TYPE) - ? "undefined" - : ((ElementType() == FLOAT) - ? "float" - : "double")); - } - inline StressType StressMode() const { return stress_type; } @@ -108,21 +93,13 @@ public: inline size_t LoadFactor() const { return load_factor; } - - inline unsigned int ParallelBaseReference() const { - return parallel_base_ref;; - } - -private: - - ScalarType element_type; + + private: StressType stress_type; size_t max_threads; size_t vector_size; size_t load_factor; - unsigned int parallel_base_ref; unsigned int counter_scale; - }; } // namespace perf diff --git a/base_cpp/perf/include/embb/base/perf/perf_test_unit.h b/base_cpp/perf/include/embb/base/perf/parallel_perf_test_unit.h similarity index 71% rename from base_cpp/perf/include/embb/base/perf/perf_test_unit.h rename to base_cpp/perf/include/embb/base/perf/parallel_perf_test_unit.h index e1aa6c5..5e27e40 100644 --- a/base_cpp/perf/include/embb/base/perf/perf_test_unit.h +++ b/base_cpp/perf/include/embb/base/perf/parallel_perf_test_unit.h @@ -24,14 +24,15 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef EMBB_BASE_PERF_PERF_TEST_UNIT_H_ -#define EMBB_BASE_PERF_PERF_TEST_UNIT_H_ +#ifndef EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_ +#define EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_ #include #include #include #include #include +#include #include #include #include @@ -63,62 +64,52 @@ namespace perf { * \notthreadsafe * \ingroup CPP_BASE_PERF */ -template -class PerfTestUnit : public partest::TestUnit { +template +class ParallelPerfTestUnit : public partest::TestUnit { public: /** * Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F. */ - explicit PerfTestUnit(const TestParams & params) - : partest::TestUnit("PTU"), - params_(params), - duration_(0) { - func = new F(params_); -// Add(&PerfTestUnit::Run, this); + explicit ParallelPerfTestUnit(const CallArgs & params) : + partest::TestUnit("ParallelPerfTestUnit"), + params_(params) { + func_ = new ParallelF(params_); + Add(&ParallelPerfTestUnit::Run, this); } /** - * Destructs PerfTestUnit + * Destructs ParallelPerfTestUnit */ - ~PerfTestUnit() { - delete func; + ~ParallelPerfTestUnit() { + delete func_; } /** - * Returns duration of this unit in microseconds. - * \return Duration of this unit in microseconds. + * Durations of single runs of this unit in microseconds. + * \return Vector of durations of single runs of this unit + * ordered by number of threads, in microseconds. */ - double GetDuration() const { return duration_; } - -#if 0 - /** - * Returns thread count of this unit. - * \return Thread count of this unit. - */ - size_t GetThreadCount() const { return thread_count_; } - - /** - * Returns iteration count of this unit. - * \return Iteration count of this unit. - */ - size_t GetIterationCount() const { return iteration_count_; } -#endif + const std::vector< std::pair > & GetDurations() const { + return durations_; + } private: - void Run() { - for (unsigned int num_threads = 1; - num_threads < params_.MaxThreads();) { - func->Pre(); - Tic(); - func->Run(num_threads); - Toc(); - func->Post(); - if (num_threads < 4) { - num_threads++; - } else { - num_threads += 4; - } - } + /** + * Run performance test + */ + void Run() { + for (unsigned int num_threads = 1; num_threads <= params_.MaxThreads();) { + func_->Pre(); + Tic(); + func_->Run(num_threads); + Toc(num_threads); + func_->Post(); + if (num_threads < 4) { + num_threads++; + } else { + num_threads += 4; + } + } } /** @@ -149,26 +140,25 @@ class PerfTestUnit : public partest::TestUnit { /** * Stops timer and resets EMBB */ - void Toc() { + void Toc(unsigned int num_threads) { // stop timer - duration_ = timer_.Elapsed(); + durations_.push_back( + std::make_pair(num_threads, timer_.Elapsed())); embb::tasks::Node::Finalize(); } - const TestParams & params_; - double duration_; -//size_t thread_count_; -//size_t iteration_count_; + const CallArgs params_; + std::vector< std::pair > durations_; Timer timer_; - F *func; + ParallelF * func_; // prohibit copy and assignment - PerfTestUnit(const PerfTestUnit &other); - PerfTestUnit& operator=(const PerfTestUnit &other); + ParallelPerfTestUnit(const ParallelPerfTestUnit &other); + ParallelPerfTestUnit& operator=(const ParallelPerfTestUnit & other); }; } // perf } // base } // embb -#endif /* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */ +#endif // EMBB_BASE_PERF_PARALLEL_PERF_TEST_UNIT_H_ diff --git a/base_cpp/perf/include/embb/base/perf/perf.h b/base_cpp/perf/include/embb/base/perf/perf.h new file mode 100644 index 0000000..23aeacb --- /dev/null +++ b/base_cpp/perf/include/embb/base/perf/perf.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EMBB_BASE_CPP_PERF_PERF_H_ +#define EMBB_BASE_CPP_PERF_PERF_H_ + +#include +#include +#include +#include +#include + +#define PT_PERF_MAIN(component) \ +template \ +void PartestRunPerformanceTest(Test & test) { \ + test.Run(); \ + test.PrintReport(std::cout); \ +} \ +void PartestRunPerformanceTests( \ + embb::base::perf::CallArgs & perf_test_params); \ +int main(int argc, char** argv) { \ + std::cout << component << ::std::endl; \ + embb::base::perf::CallArgs perf_test_params; \ + try { \ + perf_test_params.Parse(argc, argv); \ + } \ + catch (::std::runtime_error & re) { \ + ::std::cerr << re.what() << ::std::endl; \ + } \ + perf_test_params.Print(::std::cout); \ + PartestRunPerformanceTests(perf_test_params); \ +} \ +void PartestRunPerformanceTests( \ + embb::base::perf::CallArgs & perf_test_params) + +#define PT_PERF_RUN(PT_PERF_TEST) \ +( \ + (std::cout << "Running " << #PT_PERF_TEST << " ..." << std::endl), \ + PartestRunPerformanceTest(PT_PERF_TEST(perf_test_params)), \ + (std::cout << "Running " << #PT_PERF_TEST << " ..." << " done" << std::endl) \ +) + +#endif // EMBB_BASE_CPP_PERF_PERF_H_ \ No newline at end of file diff --git a/base_cpp/perf/include/embb/base/perf/performance_test.h b/base_cpp/perf/include/embb/base/perf/performance_test.h index 3bd15e9..8a7fea7 100644 --- a/base_cpp/perf/include/embb/base/perf/performance_test.h +++ b/base_cpp/perf/include/embb/base/perf/performance_test.h @@ -50,10 +50,7 @@ class PerformanceTest : public partest::TestCase { * Constructs PerformanceTest. */ explicit PerformanceTest(const TestParams & params) - : partest::TestCase(), params_(params) { - // maximum one thread per available core - size_t threads = std::min(params.MaxThreads(), - embb::base::CoreSet::CountAvailable()); + : partest::TestCase(), params_(params) { unit_ = &CreateUnit< PerfTestUnit >(params_); } @@ -82,8 +79,8 @@ class PerformanceTest : public partest::TestCase { PerformanceTest& operator=(const PerformanceTest &other); }; -} /* perf */ -} /* base */ -} /* embb */ +} // perf +} // base +} // embb -#endif /* EMBB_BASE_PERF_PERFORMANCE_TEST_H_ */ +#endif // EMBB_BASE_PERF_PERFORMANCE_TEST_H_ diff --git a/base_cpp/perf/include/embb/base/perf/serial_perf_test_unit.h b/base_cpp/perf/include/embb/base/perf/serial_perf_test_unit.h new file mode 100644 index 0000000..d02c8dc --- /dev/null +++ b/base_cpp/perf/include/embb/base/perf/serial_perf_test_unit.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_ +#define EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define THIS_DOMAIN_ID 1 +#define THIS_NODE_ID 1 + +namespace embb { +namespace base { +namespace perf { + +/** + * \defgroup CPP_BASE_PERF Performance Tests + * + * Performance Test Framework + * + * \ingroup CPP_BASE + */ +/** + * Performance Test Unit + * + * Base unit of any test (Speedup Test, Performance Test, ...). Takes a + * non-copyable Functor as template argument and executes it \c iteration_count + * times on \c thread_count worker threads. + * + * If \c thread_count equals 0, EMBB is not initialized and the Functor is + * executed without EMBB support. + * + * \notthreadsafe + * \ingroup CPP_BASE_PERF + */ +template +class SerialPerfTestUnit : public partest::TestUnit { + public: + /** + * Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F. + */ + explicit SerialPerfTestUnit(const CallArgs & params) : + partest::TestUnit("SerialPerfTestUnit"), + params_(params), + duration_(0) { + func_ = new SerialF(params_); + Add(&SerialPerfTestUnit::Run, this); + } + + /** + * Destructs SerialPerfTestUnit + */ + ~SerialPerfTestUnit() { + delete func_; + } + + /** + * Durations of single runs of this unit in microseconds. + * \return Vector of durations of single runs of this unit + * ordered by number of threads, in microseconds. + */ + double GetDuration() const { + return duration_; + } + + private: + /** + * Run performance test + */ + void Run() { + func_->Pre(); + Tic(); + func_->Run(); + Toc(); + func_->Post(); + } + + /** + * Sets up EMBB and starts timer. + */ + void Tic() { + // start timer + timer_ = Timer(); + } + + /** + * Stops timer and resets EMBB + */ + void Toc() { + // stop timer + duration_ = timer_.Elapsed(); + } + + const CallArgs params_; + double duration_; + Timer timer_; + SerialF * func_; + + // prohibit copy and assignment + SerialPerfTestUnit(const SerialPerfTestUnit & other); + SerialPerfTestUnit& operator=(const SerialPerfTestUnit & other); +}; + +} // perf +} // base +} // embb + +#endif // EMBB_BASE_PERF_SERIAL_PERF_TEST_UNIT_H_ diff --git a/base_cpp/perf/include/embb/base/perf/speedup_test.h b/base_cpp/perf/include/embb/base/perf/speedup_test.h index 8db1eca..a4dbae2 100644 --- a/base_cpp/perf/include/embb/base/perf/speedup_test.h +++ b/base_cpp/perf/include/embb/base/perf/speedup_test.h @@ -31,7 +31,9 @@ #include #include #include -#include +#include +#include +#include namespace embb { namespace base { @@ -49,32 +51,18 @@ namespace perf { * \notthreadsafe * \ingroup CPP_BASE_PERF */ -template +template class SpeedupTest : public partest::TestCase { public: /** * Constructs SpeedupTest and creates test units. */ - explicit SpeedupTest( - size_t max_thread_count = partest::TestSuite::GetDefaultNumThreads(), - size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) : - partest::TestCase() { - /* maximum one thread per available core */ - size_t threads = std::min( - max_thread_count, - embb::base::CoreSet::CountAvailable()); - - std::cout << "Test configuration ------------------------------------" << std::endl; - std::cout << " Num threads: " << threads << std::endl; - std::cout << " Iterations: " << iteration_count << std::endl; - - /* create unit for serial version */ - ser_unit_ = &CreateUnit< PerfTestUnit >(0, iteration_count); - /* create log2(threads)+1 units for parallel version */ - for (size_t i = 1; i <= threads; i = i * 2) { - par_units_.push_back( - &CreateUnit< PerfTestUnit >(i, iteration_count)); - } + explicit SpeedupTest(const embb::base::perf::CallArgs & params) + : partest::TestCase(), params_(params) { + // create unit for serial version + ser_unit_ = &CreateUnit< SerialPerfTestUnit, CallArgs >(params_); + // create unit for parallel version + par_unit_ = &CreateUnit< ParallelPerfTestUnit, CallArgs >(params_); } /** @@ -87,31 +75,39 @@ class SpeedupTest : public partest::TestCase { * Prints the durations of all units in comma separated format. */ void PrintReport(std::ostream & ostr) { - /* print sample row for sequential run (degree 0): */ + double serial_duration = ser_unit_->GetDuration(); + // print sample row for sequential run (degree 0): ostr << "0," << std::fixed << std::setprecision(2) - << ser_unit_->GetDuration() << std::endl; - /* print sample rows for parallel runs (degree > 0): */ - for (int i = 0; i < par_units_.size(); ++i) { - ostr << std::fixed << par_units_[i]->GetThreadCount() + << serial_duration << "," + << std::fixed << 1.0 + << std::endl; + // print sample rows for parallel runs (degree > 0): + std::vector < std::pair< unsigned int, double > > durations = + par_unit_->GetDurations(); + for (unsigned int i = 0; i < durations.size(); ++i) { + ostr << std::fixed << durations[i].first << "," << std::fixed << std::setprecision(2) - << par_units_[i]->GetDuration() + << durations[i].second + << "," + << std::fixed << serial_duration / durations[i].second << std::endl; } } private: - std::vector *> par_units_; - PerfTestUnit *ser_unit_; + const CallArgs & params_; + ParallelPerfTestUnit * par_unit_; + SerialPerfTestUnit * ser_unit_; /* prohibit copy and assignment */ SpeedupTest(const SpeedupTest &other); SpeedupTest& operator=(const SpeedupTest &other); }; -} /* perf */ -} /* base */ -} /* embb */ +} // perf +} // base +} // embb #endif /* EMBB_BASE_PERF_SPEEDUP_TEST_H_ */ diff --git a/base_cpp/perf/src/call_args.cc b/base_cpp/perf/src/call_args.cc index e204a2a..3fcbb16 100644 --- a/base_cpp/perf/src/call_args.cc +++ b/base_cpp/perf/src/call_args.cc @@ -37,17 +37,6 @@ void CallArgs::Parse(int argc, char * argv[]) { counter_scale = scale_param; } } - // Element type: - if (std::string(argv[paramIndex]) == "-e") { - element_type = UNDEFINED_SCALAR_TYPE; - ::std::string type = argv[paramIndex + 1]; - if (type == "float") { - element_type = FLOAT; - } - else if (type == "double") { - element_type = DOUBLE; - } - } // Stress type: if (std::string(argv[paramIndex]) == "-s") { stress_type = UNDEFINED_STRESS_TYPE; @@ -64,15 +53,6 @@ void CallArgs::Parse(int argc, char * argv[]) { load_factor = static_cast( atoi(argv[paramIndex + 1])); } - // Additional test parameter: - if (std::string(argv[paramIndex]) == "-p") { - parallel_base_ref = atoi(argv[paramIndex + 1]); - } - // Sanitizing and error handling: - if (element_type == UNDEFINED_SCALAR_TYPE) { - throw ::std::runtime_error( - "Invalid setting for element type (-e int|float|double)"); - } if (stress_type == UNDEFINED_STRESS_TYPE) { throw ::std::runtime_error( "Invalid setting for stress test type (-s ram|cpu)"); @@ -88,9 +68,7 @@ void CallArgs::Print(std::ostream & os) { os << "Max. threads: (-t) " << MaxThreads() << std::endl << "Vector size: (-n) " << VectorSize() << std::endl << "Load factor: (-l) " << LoadFactor() << std::endl - << "Element type: (-e) " << ElementTypeName() << std::endl << "Stress mode: (-s) " << StressModeName() << std::endl - << "Serial base ref: (-p) " << ParallelBaseReference() << std::endl << "Time sampling: (-f) " << embb::base::perf::Timer::TimerName() << std::endl; }