diff --git a/algorithms_cpp/perf/for_each_perf-inl.h b/algorithms_cpp/perf/for_each_perf-inl.h index 4170cbe..0933d98 100644 --- a/algorithms_cpp/perf/for_each_perf-inl.h +++ b/algorithms_cpp/perf/for_each_perf-inl.h @@ -47,8 +47,7 @@ SerialForEach::SerialForEach(const embb::base::perf::CallArgs & args) for (size_t i = 0; i < vector_size; i++) { v[i] = static_cast(i); } - } - else { + } else { v = 0; } } @@ -67,8 +66,7 @@ void SerialForEach::Run() { T v = static_cast(i); op(v); } - } - else if (cargs.StressMode() == CallArgs::RAM_STRESS) { + } else if (cargs.StressMode() == CallArgs::RAM_STRESS) { for (size_t i = 0; i < vector_size; i++) { op(v[i]); } @@ -84,8 +82,7 @@ ParallelForEach::ParallelForEach(const embb::base::perf::CallArgs & args) for (size_t i = 0; i < vector_size; i++) { v[i] = static_cast(i); } - } - else { + } else { v = 0; } } diff --git a/algorithms_cpp/perf/main.cc b/algorithms_cpp/perf/main.cc index c582188..ad7309b 100644 --- a/algorithms_cpp/perf/main.cc +++ b/algorithms_cpp/perf/main.cc @@ -46,9 +46,14 @@ #include #include +#include + using namespace embb::algorithms::perf; using embb::base::perf::Timer; using embb::base::perf::CallArgs; +using embb::base::perf::PerformanceTest; + +#if 0 void ReportResult( const std::string & name, @@ -156,17 +161,6 @@ void RunPerformanceTest( } } -template -void RunPerformanceTests( - const CallArgs & args) { - RunPerformanceTest, ParallelForEach >(args, "ForEach"); - RunPerformanceTest, ParallelReduce >(args, "Reduce"); - RunPerformanceTest, ParallelScan >(args, "Scan"); - RunPerformanceTest, ParallelCount >(args, "Count"); - RunPerformanceTest, ParallelQuickSort >(args, "Quicksort"); - RunPerformanceTest, ParallelMergeSort >(args, "Mergesort"); -} - int main(int argc, char * argv[]) { // Parse command line arguments: embb::base::perf::CallArgs args; @@ -174,22 +168,37 @@ int main(int argc, char * argv[]) { args.Parse(argc, argv); } catch (::std::runtime_error & re) { ::std::cerr << re.what() << ::std::endl; + } + // Print test settings: + args.Print(::std::cout); + // Run tests: + RunPerformanceTest< SerialForEach, ParallelForEach >(args, "ForEach"); + RunPerformanceTest< SerialReduce, ParallelReduce >(args, "Reduce"); + RunPerformanceTest< SerialScan, ParallelScan >(args, "Scan"); + RunPerformanceTest< SerialCount, ParallelCount >(args, "Count"); + RunPerformanceTest< SerialQuickSort, ParallelQuickSort >(args, "Quicksort"); + RunPerformanceTest< SerialMergeSort, ParallelMergeSort >(args, "Mergesort"); + return 0; +} + +#endif + +int main(int argc, char * argv[]) { + // Parse command line arguments: + CallArgs args; + try { + args.Parse(argc, argv); } - if (args.ParallelBaseReference() == 1) { - embb_log_set_log_level(EMBB_LOG_LEVEL_TRACE); + catch (::std::runtime_error & re) { + ::std::cerr << re.what() << ::std::endl; } // Print test settings: args.Print(::std::cout); // Run tests: - switch (args.ElementType()) { - case CallArgs::FLOAT: - RunPerformanceTests(args); - break; - case CallArgs::DOUBLE: - RunPerformanceTests(args); - break; - case CallArgs::UNDEFINED_SCALAR_TYPE: - break; - } + PerformanceTest< SerialForEach, ParallelForEach, CallArgs > + test(args); + test.Run(); + test.PrintReport(std::cout); + return 0; -} +} \ No newline at end of file diff --git a/base_cpp/CMakeLists.txt b/base_cpp/CMakeLists.txt index b8f7d4b..93b130d 100644 --- a/base_cpp/CMakeLists.txt +++ b/base_cpp/CMakeLists.txt @@ -21,6 +21,7 @@ GroupSourcesMSVC(include/embb/base) GroupSourcesMSVC(src) if (BUILD_TESTS STREQUAL ON) GroupSourcesMSVC(test) + GroupSourcesMSVC(perf/include/embb/perf) endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include @@ -47,9 +48,14 @@ if (BUILD_TESTS STREQUAL ON) # Performance tests include_directories(perf/include ${CMAKE_CURRENT_BINARY_DIR}/../partest/include + ${CMAKE_CURRENT_SOURCE_DIR}/../tasks_cpp/include + ${CMAKE_CURRENT_BINARY_DIR}/../tasks_cpp/include ) - add_library (embb_base_cpp_perf ${EMBB_BASE_PERF_SOURCES}) - target_link_libraries(embb_base_cpp_perf partest embb_base_cpp embb_base_c + add_library (embb_base_cpp_perf + ${EMBB_BASE_PERF_HEADERS} + ${EMBB_BASE_PERF_SOURCES}) + target_link_libraries(embb_base_cpp_perf + partest embb_tasks_cpp embb_base_cpp embb_base_c ${compiler_libs}) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/perf/include/embb DESTINATION include FILES_MATCHING PATTERN "*.h") diff --git a/base_cpp/perf/include/embb/base/perf/perf_test_unit.h b/base_cpp/perf/include/embb/base/perf/perf_test_unit.h index e7dfaf9..e1aa6c5 100644 --- a/base_cpp/perf/include/embb/base/perf/perf_test_unit.h +++ b/base_cpp/perf/include/embb/base/perf/perf_test_unit.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -63,25 +63,18 @@ namespace perf { * \notthreadsafe * \ingroup CPP_BASE_PERF */ -template +template class PerfTestUnit : public partest::TestUnit { public: /** * Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F. */ - explicit PerfTestUnit( - size_t thread_count = partest::TestSuite::GetDefaultNumThreads(), - size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) : - partest::TestUnit("PTU"), duration_(0), thread_count_(thread_count), - iteration_count_(iteration_count) { - /* TODO: move creation and deletion of functor data (e.g. vector of doubles) - * to functor-specific Pre/Post methods to avoid memory shortage */ - /* TODO: create possibility to initialize memory in these functor-specific - * Pre/Post methods to avoid first-touch problem. */ - func = new F; - Pre(&PerfTestUnit::Tic, this); - Add(&F::Run, func, 1, iteration_count_); - Post(&PerfTestUnit::Toc, this); + explicit PerfTestUnit(const TestParams & params) + : partest::TestUnit("PTU"), + params_(params), + duration_(0) { + func = new F(params_); +// Add(&PerfTestUnit::Run, this); } /** @@ -97,6 +90,7 @@ class PerfTestUnit : public partest::TestUnit { */ double GetDuration() const { return duration_; } +#if 0 /** * Returns thread count of this unit. * \return Thread count of this unit. @@ -108,59 +102,73 @@ class PerfTestUnit : public partest::TestUnit { * \return Iteration count of this unit. */ size_t GetIterationCount() const { return iteration_count_; } +#endif private: + void Run() { + for (unsigned int num_threads = 1; + num_threads < params_.MaxThreads();) { + func->Pre(); + Tic(); + func->Run(num_threads); + Toc(); + func->Post(); + if (num_threads < 4) { + num_threads++; + } else { + num_threads += 4; + } + } + } + /** * Sets up EMBB and starts timer. */ void Tic() { - /* if thread_count equals 0, run without EMBB */ - if (thread_count_ > 0) { - /* initialize EMBB with thread_count worker threads */ - embb::base::CoreSet core_set_(false); - for (unsigned int i = 0; (i < embb::base::CoreSet::CountAvailable()) && - (i < thread_count_); i++) { - core_set_.Add(i); - } - embb::mtapi::Node::Initialize(THIS_DOMAIN_ID, THIS_NODE_ID, core_set_, - MTAPI_NODE_MAX_TASKS_DEFAULT, - MTAPI_NODE_MAX_GROUPS_DEFAULT, - MTAPI_NODE_MAX_QUEUES_DEFAULT, - MTAPI_NODE_QUEUE_LIMIT_DEFAULT, - MTAPI_NODE_MAX_PRIORITIES_DEFAULT); + // Set number of available threads to given limit: + embb_internal_thread_index_reset(); + // Configure cores to be used by EMBB: + embb::base::CoreSet cores(false); + for (unsigned int coreId = 0; + coreId < params_.MaxThreads(); + ++coreId) { + cores.Add(coreId); } - /* start timer */ + embb::tasks::Node::Initialize( + THIS_DOMAIN_ID, THIS_NODE_ID, + cores, + MTAPI_NODE_MAX_TASKS_DEFAULT, + MTAPI_NODE_MAX_GROUPS_DEFAULT, + MTAPI_NODE_MAX_QUEUES_DEFAULT, + MTAPI_NODE_QUEUE_LIMIT_DEFAULT, + MTAPI_NODE_MAX_PRIORITIES_DEFAULT); + // start timer timer_ = Timer(); } /** - * Stops timer and resets EMBB */ + * Stops timer and resets EMBB + */ void Toc() { - /* stop timer */ + // stop timer duration_ = timer_.Elapsed(); - /* execute EMBB Finalize (if EMBB was initialized) */ - if (thread_count_ > 0) { - embb::mtapi::Node::Finalize(); - /* reset internal thread count in EMBB. required in order to avoid - * lock-ups */ - /* TODO: Talk to TobFuchs about nicer implementation */ - embb_internal_thread_index_reset(); - } + embb::tasks::Node::Finalize(); } + const TestParams & params_; double duration_; - size_t thread_count_; - size_t iteration_count_; +//size_t thread_count_; +//size_t iteration_count_; Timer timer_; F *func; - /* prohibit copy and assignment */ + // prohibit copy and assignment PerfTestUnit(const PerfTestUnit &other); PerfTestUnit& operator=(const PerfTestUnit &other); }; -} /* perf */ -} /* base */ -} /* embb */ +} // perf +} // base +} // embb #endif /* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */ diff --git a/base_cpp/perf/include/embb/base/perf/performance_test.h b/base_cpp/perf/include/embb/base/perf/performance_test.h index aebfe19..3bd15e9 100644 --- a/base_cpp/perf/include/embb/base/perf/performance_test.h +++ b/base_cpp/perf/include/embb/base/perf/performance_test.h @@ -43,21 +43,18 @@ namespace perf { * \notthreadsafe * \ingroup CPP_BASE_PERF */ -template +template class PerformanceTest : public partest::TestCase { public: /** * Constructs PerformanceTest. */ - explicit PerformanceTest( - size_t thread_count = partest::TestSuite::GetDefaultNumThreads(), - size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) : - partest::TestCase() { - /* maximum one thread per available core */ - size_t threads = std::min(thread_count, + explicit PerformanceTest(const TestParams & params) + : partest::TestCase(), params_(params) { + // maximum one thread per available core + size_t threads = std::min(params.MaxThreads(), embb::base::CoreSet::CountAvailable()); - - unit = &CreateUnit< PerfTestUnit >(threads, iteration_count); + unit_ = &CreateUnit< PerfTestUnit >(params_); } /** @@ -70,14 +67,15 @@ class PerformanceTest : public partest::TestCase { /** * Prints the durations of all units in comma separated format. */ - void PrintReport(std::ostream &ostr) const { - /* print execution duration */ - ostr << "P" << unit->GetThreadCount << std::endl << unit->GetDuration() - << std::endl; + void PrintReport(std::ostream & ostr) const { + // print execution duration +// ostr << "P" << unit_->GetThreadCount() << std::endl +// << unit_->GetDuration() << std::endl; } private: - PerfTestUnit *unit; + const TestParams & params_; + PerfTestUnit * unit_; /* prohibit copy and assignment */ PerformanceTest(const PerformanceTest &other); diff --git a/base_cpp/perf/include/embb/base/perf/speedup_test.h b/base_cpp/perf/include/embb/base/perf/speedup_test.h index a856dd7..8db1eca 100644 --- a/base_cpp/perf/include/embb/base/perf/speedup_test.h +++ b/base_cpp/perf/include/embb/base/perf/speedup_test.h @@ -86,7 +86,7 @@ class SpeedupTest : public partest::TestCase { /** * Prints the durations of all units in comma separated format. */ - void PrintReport(std::ostream &ostr) { + void PrintReport(std::ostream & ostr) { /* print sample row for sequential run (degree 0): */ ostr << "0," << std::fixed << std::setprecision(2)