Commit f9beb8b6 by Tobias Fuchs

performance tests: refactoring performance test framework to partest component

parent ad189eb9
......@@ -47,8 +47,7 @@ SerialForEach<T>::SerialForEach(const embb::base::perf::CallArgs & args)
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
else {
} else {
v = 0;
}
}
......@@ -67,8 +66,7 @@ void SerialForEach<T>::Run() {
T v = static_cast<T>(i);
op(v);
}
}
else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
} else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
for (size_t i = 0; i < vector_size; i++) {
op(v[i]);
}
......@@ -84,8 +82,7 @@ ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args)
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
else {
} else {
v = 0;
}
}
......
......@@ -46,9 +46,14 @@
#include <quick_sort_perf.h>
#include <merge_sort_perf.h>
#include <embb/base/perf/performance_test.h>
using namespace embb::algorithms::perf;
using embb::base::perf::Timer;
using embb::base::perf::CallArgs;
using embb::base::perf::PerformanceTest;
#if 0
void ReportResult(
const std::string & name,
......@@ -156,17 +161,6 @@ void RunPerformanceTest(
}
}
template<typename EType>
void RunPerformanceTests(
const CallArgs & args) {
RunPerformanceTest<SerialForEach<EType>, ParallelForEach<EType> >(args, "ForEach");
RunPerformanceTest<SerialReduce<EType>, ParallelReduce<EType> >(args, "Reduce");
RunPerformanceTest<SerialScan<EType>, ParallelScan<EType> >(args, "Scan");
RunPerformanceTest<SerialCount<EType>, ParallelCount<EType> >(args, "Count");
RunPerformanceTest<SerialQuickSort<EType>, ParallelQuickSort<EType> >(args, "Quicksort");
RunPerformanceTest<SerialMergeSort<EType>, ParallelMergeSort<EType> >(args, "Mergesort");
}
int main(int argc, char * argv[]) {
// Parse command line arguments:
embb::base::perf::CallArgs args;
......@@ -175,21 +169,36 @@ int main(int argc, char * argv[]) {
} catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
if (args.ParallelBaseReference() == 1) {
embb_log_set_log_level(EMBB_LOG_LEVEL_TRACE);
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
switch (args.ElementType()) {
case CallArgs::FLOAT:
RunPerformanceTests<float>(args);
break;
case CallArgs::DOUBLE:
RunPerformanceTests<double>(args);
break;
case CallArgs::UNDEFINED_SCALAR_TYPE:
break;
RunPerformanceTest< SerialForEach<float>, ParallelForEach<float> >(args, "ForEach");
RunPerformanceTest< SerialReduce<float>, ParallelReduce<float> >(args, "Reduce");
RunPerformanceTest< SerialScan<float>, ParallelScan<float> >(args, "Scan");
RunPerformanceTest< SerialCount<float>, ParallelCount<float> >(args, "Count");
RunPerformanceTest< SerialQuickSort<float>, ParallelQuickSort<float> >(args, "Quicksort");
RunPerformanceTest< SerialMergeSort<float>, ParallelMergeSort<float> >(args, "Mergesort");
return 0;
}
#endif
int main(int argc, char * argv[]) {
// Parse command line arguments:
CallArgs args;
try {
args.Parse(argc, argv);
}
catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
PerformanceTest< SerialForEach<float>, ParallelForEach<float>, CallArgs >
test(args);
test.Run();
test.PrintReport(std::cout);
return 0;
}
\ No newline at end of file
......@@ -21,6 +21,7 @@ GroupSourcesMSVC(include/embb/base)
GroupSourcesMSVC(src)
if (BUILD_TESTS STREQUAL ON)
GroupSourcesMSVC(test)
GroupSourcesMSVC(perf/include/embb/perf)
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
......@@ -47,9 +48,14 @@ if (BUILD_TESTS STREQUAL ON)
# Performance tests
include_directories(perf/include
${CMAKE_CURRENT_BINARY_DIR}/../partest/include
${CMAKE_CURRENT_SOURCE_DIR}/../tasks_cpp/include
${CMAKE_CURRENT_BINARY_DIR}/../tasks_cpp/include
)
add_library (embb_base_cpp_perf ${EMBB_BASE_PERF_SOURCES})
target_link_libraries(embb_base_cpp_perf partest embb_base_cpp embb_base_c
add_library (embb_base_cpp_perf
${EMBB_BASE_PERF_HEADERS}
${EMBB_BASE_PERF_SOURCES})
target_link_libraries(embb_base_cpp_perf
partest embb_tasks_cpp embb_base_cpp embb_base_c
${compiler_libs})
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/perf/include/embb
DESTINATION include FILES_MATCHING PATTERN "*.h")
......
......@@ -32,7 +32,7 @@
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/mtapi/mtapi.h>
#include <embb/tasks/tasks.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
......@@ -63,25 +63,18 @@ namespace perf {
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename F>
template<typename F, class TestParams >
class PerfTestUnit : public partest::TestUnit {
public:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit PerfTestUnit(
size_t thread_count = partest::TestSuite::GetDefaultNumThreads(),
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) :
partest::TestUnit("PTU"), duration_(0), thread_count_(thread_count),
iteration_count_(iteration_count) {
/* TODO: move creation and deletion of functor data (e.g. vector of doubles)
* to functor-specific Pre/Post methods to avoid memory shortage */
/* TODO: create possibility to initialize memory in these functor-specific
* Pre/Post methods to avoid first-touch problem. */
func = new F;
Pre(&PerfTestUnit::Tic, this);
Add(&F::Run, func, 1, iteration_count_);
Post(&PerfTestUnit::Toc, this);
explicit PerfTestUnit(const TestParams & params)
: partest::TestUnit("PTU"),
params_(params),
duration_(0) {
func = new F(params_);
// Add(&PerfTestUnit<F, TestParams>::Run, this);
}
/**
......@@ -97,6 +90,7 @@ class PerfTestUnit : public partest::TestUnit {
*/
double GetDuration() const { return duration_; }
#if 0
/**
* Returns thread count of this unit.
* \return Thread count of this unit.
......@@ -108,59 +102,73 @@ class PerfTestUnit : public partest::TestUnit {
* \return Iteration count of this unit.
*/
size_t GetIterationCount() const { return iteration_count_; }
#endif
private:
void Run() {
for (unsigned int num_threads = 1;
num_threads < params_.MaxThreads();) {
func->Pre();
Tic();
func->Run(num_threads);
Toc();
func->Post();
if (num_threads < 4) {
num_threads++;
} else {
num_threads += 4;
}
}
}
/**
* Sets up EMBB and starts timer.
*/
void Tic() {
/* if thread_count equals 0, run without EMBB */
if (thread_count_ > 0) {
/* initialize EMBB with thread_count worker threads */
embb::base::CoreSet core_set_(false);
for (unsigned int i = 0; (i < embb::base::CoreSet::CountAvailable()) &&
(i < thread_count_); i++) {
core_set_.Add(i);
// Set number of available threads to given limit:
embb_internal_thread_index_reset();
// Configure cores to be used by EMBB:
embb::base::CoreSet cores(false);
for (unsigned int coreId = 0;
coreId < params_.MaxThreads();
++coreId) {
cores.Add(coreId);
}
embb::mtapi::Node::Initialize(THIS_DOMAIN_ID, THIS_NODE_ID, core_set_,
embb::tasks::Node::Initialize(
THIS_DOMAIN_ID, THIS_NODE_ID,
cores,
MTAPI_NODE_MAX_TASKS_DEFAULT,
MTAPI_NODE_MAX_GROUPS_DEFAULT,
MTAPI_NODE_MAX_QUEUES_DEFAULT,
MTAPI_NODE_QUEUE_LIMIT_DEFAULT,
MTAPI_NODE_MAX_PRIORITIES_DEFAULT);
}
/* start timer */
// start timer
timer_ = Timer();
}
/**
* Stops timer and resets EMBB */
* Stops timer and resets EMBB
*/
void Toc() {
/* stop timer */
// stop timer
duration_ = timer_.Elapsed();
/* execute EMBB Finalize (if EMBB was initialized) */
if (thread_count_ > 0) {
embb::mtapi::Node::Finalize();
/* reset internal thread count in EMBB. required in order to avoid
* lock-ups */
/* TODO: Talk to TobFuchs about nicer implementation */
embb_internal_thread_index_reset();
}
embb::tasks::Node::Finalize();
}
const TestParams & params_;
double duration_;
size_t thread_count_;
size_t iteration_count_;
//size_t thread_count_;
//size_t iteration_count_;
Timer timer_;
F *func;
/* prohibit copy and assignment */
// prohibit copy and assignment
PerfTestUnit(const PerfTestUnit &other);
PerfTestUnit& operator=(const PerfTestUnit &other);
};
} /* perf */
} /* base */
} /* embb */
} // perf
} // base
} // embb
#endif /* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */
......@@ -43,21 +43,18 @@ namespace perf {
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename ParallelF>
template<typename SerialF, typename ParallelF, class TestParams>
class PerformanceTest : public partest::TestCase {
public:
/**
* Constructs PerformanceTest.
*/
explicit PerformanceTest(
size_t thread_count = partest::TestSuite::GetDefaultNumThreads(),
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) :
partest::TestCase() {
/* maximum one thread per available core */
size_t threads = std::min<size_t>(thread_count,
explicit PerformanceTest(const TestParams & params)
: partest::TestCase(), params_(params) {
// maximum one thread per available core
size_t threads = std::min<size_t>(params.MaxThreads(),
embb::base::CoreSet::CountAvailable());
unit = &CreateUnit< PerfTestUnit<ParallelF> >(threads, iteration_count);
unit_ = &CreateUnit< PerfTestUnit<ParallelF, TestParams> >(params_);
}
/**
......@@ -70,14 +67,15 @@ class PerformanceTest : public partest::TestCase {
/**
* Prints the durations of all units in comma separated format.
*/
void PrintReport(std::ostream &ostr) const {
/* print execution duration */
ostr << "P" << unit->GetThreadCount << std::endl << unit->GetDuration()
<< std::endl;
void PrintReport(std::ostream & ostr) const {
// print execution duration
// ostr << "P" << unit_->GetThreadCount() << std::endl
// << unit_->GetDuration() << std::endl;
}
private:
PerfTestUnit<ParallelF> *unit;
const TestParams & params_;
PerfTestUnit<ParallelF, TestParams> * unit_;
/* prohibit copy and assignment */
PerformanceTest(const PerformanceTest &other);
......
......@@ -86,7 +86,7 @@ class SpeedupTest : public partest::TestCase {
/**
* Prints the durations of all units in comma separated format.
*/
void PrintReport(std::ostream &ostr) {
void PrintReport(std::ostream & ostr) {
/* print sample row for sequential run (degree 0): */
ostr << "0,"
<< std::fixed << std::setprecision(2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment